Skip to content

Commit 15004f5

Browse files
committed
[VectorCombine] Combine extract/insert from vector
insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) --> shuffle (DstVec, SrcVec, Mask) This commit combines extract/insert on a vector into Shuffle with vector.
1 parent 32b55f3 commit 15004f5

File tree

5 files changed

+212
-84
lines changed

5 files changed

+212
-84
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class VectorCombine {
106106
Instruction &I);
107107
bool foldExtractExtract(Instruction &I);
108108
bool foldInsExtFNeg(Instruction &I);
109+
bool foldInsExtVectorToShuffle(Instruction &I);
109110
bool foldBitcastShuffle(Instruction &I);
110111
bool scalarizeBinopOrCmp(Instruction &I);
111112
bool scalarizeVPIntrinsic(Instruction &I);
@@ -2678,6 +2679,48 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
26782679
return true;
26792680
}
26802681

2682+
/// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) -->
2683+
/// shuffle (DstVec, SrcVec, Mask)
2684+
bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
2685+
Value *DstVec, *SrcVec;
2686+
uint64_t ExtIdx, InsIdx;
2687+
if (!match(&I, m_InsertElt(m_Value(DstVec),
2688+
m_OneUse(m_ExtractElt(m_Value(SrcVec),
2689+
m_ConstantInt(ExtIdx))),
2690+
m_ConstantInt(InsIdx))))
2691+
return false;
2692+
2693+
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
2694+
if (!VecTy || SrcVec->getType() != VecTy)
2695+
return false;
2696+
2697+
unsigned NumElts = VecTy->getNumElements();
2698+
if (ExtIdx >= NumElts)
2699+
return false;
2700+
2701+
SmallVector<int> Mask(NumElts);
2702+
std::iota(Mask.begin(), Mask.end(), 0);
2703+
Mask[InsIdx] = ExtIdx + NumElts;
2704+
// Cost
2705+
ExtractElementInst *Ext;
2706+
if ((Ext = dyn_cast<ExtractElementInst>(I.getOperand(0))) == nullptr)
2707+
Ext = dyn_cast<ExtractElementInst>(I.getOperand(1));
2708+
2709+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2710+
InstructionCost OldCost =
2711+
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
2712+
InstructionCost NewCost =
2713+
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask);
2714+
2715+
if (OldCost < NewCost)
2716+
return false;
2717+
2718+
Value *Shuf = Builder.CreateShuffleVector(DstVec, SrcVec, Mask);
2719+
replaceValue(I, *Shuf);
2720+
2721+
return true;
2722+
}
2723+
26812724
/// This is the entry point for all transforms. Pass manager differences are
26822725
/// handled in the callers of this function.
26832726
bool VectorCombine::run() {
@@ -2734,6 +2777,7 @@ bool VectorCombine::run() {
27342777
switch (Opcode) {
27352778
case Instruction::InsertElement:
27362779
MadeChange |= foldInsExtFNeg(I);
2780+
MadeChange |= foldInsExtVectorToShuffle(I);
27372781
break;
27382782
case Instruction::ShuffleVector:
27392783
MadeChange |= foldShuffleOfBinops(I);

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 58 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
417417
}
418418

419419
define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420-
; CHECK-LABEL: @ins_bo_ext_ext(
421-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425-
; CHECK-NEXT: ret <4 x float> [[V3]]
420+
; SSE-LABEL: @ins_bo_ext_ext(
421+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425+
; SSE-NEXT: ret <4 x float> [[V3]]
426+
;
427+
; AVX-LABEL: @ins_bo_ext_ext(
428+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
429+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
430+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
431+
; AVX-NEXT: ret <4 x float> [[V3]]
426432
;
427433
%a2 = extractelement <4 x float> %a, i32 2
428434
%a3 = extractelement <4 x float> %a, i32 3
@@ -435,13 +441,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
435441
; but it is likely that extracting from index 3 is the better option.
436442

437443
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
438-
; CHECK-LABEL: @ins_bo_ext_ext_uses(
439-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
440-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442-
; CHECK-NEXT: call void @use_f32(float [[A23]])
443-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444-
; CHECK-NEXT: ret <4 x float> [[V3]]
444+
; SSE-LABEL: @ins_bo_ext_ext_uses(
445+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
446+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
447+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
448+
; SSE-NEXT: call void @use_f32(float [[A23]])
449+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
450+
; SSE-NEXT: ret <4 x float> [[V3]]
451+
;
452+
; AVX-LABEL: @ins_bo_ext_ext_uses(
453+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
454+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
455+
; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
456+
; AVX-NEXT: call void @use_f32(float [[A23]])
457+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
458+
; AVX-NEXT: ret <4 x float> [[V3]]
445459
;
446460
%a2 = extractelement <4 x float> %a, i32 2
447461
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +466,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452466
}
453467

454468
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455-
; CHECK-LABEL: @PR34724(
456-
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457-
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463-
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466-
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467-
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
468-
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470-
; CHECK-NEXT: ret <4 x float> [[V3]]
469+
; SSE-LABEL: @PR34724(
470+
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
471+
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
472+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
473+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
474+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
475+
; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
476+
; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
477+
; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
478+
; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
479+
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
480+
; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
481+
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
482+
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
483+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
484+
; SSE-NEXT: ret <4 x float> [[V3]]
485+
;
486+
; AVX-LABEL: @PR34724(
487+
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
488+
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
489+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
490+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
491+
; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
492+
; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
493+
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
494+
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
495+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
496+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
497+
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
498+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
499+
; AVX-NEXT: ret <4 x float> [[V3]]
471500
;
472501
%a0 = extractelement <4 x float> %a, i32 0
473502
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Lines changed: 58 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
417417
}
418418

419419
define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420-
; CHECK-LABEL: @ins_bo_ext_ext(
421-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425-
; CHECK-NEXT: ret <4 x float> [[V3]]
420+
; SSE-LABEL: @ins_bo_ext_ext(
421+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425+
; SSE-NEXT: ret <4 x float> [[V3]]
426+
;
427+
; AVX-LABEL: @ins_bo_ext_ext(
428+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
429+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
430+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
431+
; AVX-NEXT: ret <4 x float> [[V3]]
426432
;
427433
%a2 = extractelement <4 x float> %a, i32 2
428434
%a3 = extractelement <4 x float> %a, i32 3
@@ -435,13 +441,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
435441
; but it is likely that extracting from index 3 is the better option.
436442

437443
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
438-
; CHECK-LABEL: @ins_bo_ext_ext_uses(
439-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
440-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442-
; CHECK-NEXT: call void @use_f32(float [[A23]])
443-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444-
; CHECK-NEXT: ret <4 x float> [[V3]]
444+
; SSE-LABEL: @ins_bo_ext_ext_uses(
445+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
446+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
447+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
448+
; SSE-NEXT: call void @use_f32(float [[A23]])
449+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
450+
; SSE-NEXT: ret <4 x float> [[V3]]
451+
;
452+
; AVX-LABEL: @ins_bo_ext_ext_uses(
453+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
454+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
455+
; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
456+
; AVX-NEXT: call void @use_f32(float [[A23]])
457+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
458+
; AVX-NEXT: ret <4 x float> [[V3]]
445459
;
446460
%a2 = extractelement <4 x float> %a, i32 2
447461
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +466,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452466
}
453467

454468
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455-
; CHECK-LABEL: @PR34724(
456-
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457-
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463-
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466-
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467-
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
468-
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470-
; CHECK-NEXT: ret <4 x float> [[V3]]
469+
; SSE-LABEL: @PR34724(
470+
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
471+
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
472+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
473+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
474+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
475+
; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
476+
; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
477+
; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
478+
; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
479+
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
480+
; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
481+
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
482+
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
483+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
484+
; SSE-NEXT: ret <4 x float> [[V3]]
485+
;
486+
; AVX-LABEL: @PR34724(
487+
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
488+
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
489+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
490+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
491+
; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
492+
; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
493+
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
494+
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
495+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
496+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
497+
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
498+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
499+
; AVX-NEXT: ret <4 x float> [[V3]]
471500
;
472501
%a0 = extractelement <4 x float> %a, i32 0
473502
%a1 = extractelement <4 x float> %a, i32 1

0 commit comments

Comments
 (0)