Skip to content

Commit a89787f

Browse files
committed
[VectorCombine] Combine extract/insert from vector
insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) --> shuffle (DstVec, SrcVec, Mask) This commit combines extract/insert on a vector into Shuffle with vector.
1 parent 32b55f3 commit a89787f

File tree

6 files changed

+183
-72
lines changed

6 files changed

+183
-72
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class VectorCombine {
106106
Instruction &I);
107107
bool foldExtractExtract(Instruction &I);
108108
bool foldInsExtFNeg(Instruction &I);
109+
bool foldInsExtVectorToShuffle(Instruction &I);
109110
bool foldBitcastShuffle(Instruction &I);
110111
bool scalarizeBinopOrCmp(Instruction &I);
111112
bool scalarizeVPIntrinsic(Instruction &I);
@@ -2678,6 +2679,48 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
26782679
return true;
26792680
}
26802681

2682+
/// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) -->
2683+
/// shuffle (DstVec, SrcVec, Mask)
2684+
bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
2685+
Value *DstVec, *SrcVec;
2686+
uint64_t ExtIdx, InsIdx;
2687+
if (!match(&I, m_InsertElt(m_Value(DstVec),
2688+
m_OneUse(m_ExtractElt(m_Value(SrcVec),
2689+
m_ConstantInt(ExtIdx))),
2690+
m_ConstantInt(InsIdx))))
2691+
return false;
2692+
2693+
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
2694+
if (!VecTy || SrcVec->getType() != VecTy)
2695+
return false;
2696+
2697+
unsigned NumElts = VecTy->getNumElements();
2698+
if (ExtIdx >= NumElts)
2699+
return false;
2700+
2701+
SmallVector<int> Mask(NumElts);
2702+
std::iota(Mask.begin(), Mask.end(), 0);
2703+
Mask[InsIdx] = ExtIdx + NumElts;
2704+
// Cost
2705+
ExtractElementInst *Ext;
2706+
if ((Ext = dyn_cast<ExtractElementInst>(I.getOperand(0))) == nullptr)
2707+
Ext = dyn_cast<ExtractElementInst>(I.getOperand(1));
2708+
2709+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2710+
InstructionCost OldCost =
2711+
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
2712+
InstructionCost NewCost =
2713+
TTI.getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask);
2714+
2715+
if (OldCost < NewCost)
2716+
return false;
2717+
2718+
Value *Shuf = Builder.CreateShuffleVector(DstVec, SrcVec, Mask);
2719+
replaceValue(I, *Shuf);
2720+
2721+
return true;
2722+
}
2723+
26812724
/// This is the entry point for all transforms. Pass manager differences are
26822725
/// handled in the callers of this function.
26832726
bool VectorCombine::run() {
@@ -2734,6 +2777,7 @@ bool VectorCombine::run() {
27342777
switch (Opcode) {
27352778
case Instruction::InsertElement:
27362779
MadeChange |= foldInsExtFNeg(I);
2780+
MadeChange |= foldInsExtVectorToShuffle(I);
27372781
break;
27382782
case Instruction::ShuffleVector:
27392783
MadeChange |= foldShuffleOfBinops(I);

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
417417
}
418418

419419
define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420-
; CHECK-LABEL: @ins_bo_ext_ext(
421-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425-
; CHECK-NEXT: ret <4 x float> [[V3]]
420+
; SSE-LABEL: @ins_bo_ext_ext(
421+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425+
; SSE-NEXT: ret <4 x float> [[V3]]
426+
;
427+
; AVX-LABEL: @ins_bo_ext_ext(
428+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
429+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
430+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
431+
; AVX-NEXT: ret <4 x float> [[V3]]
426432
;
427433
%a2 = extractelement <4 x float> %a, i32 2
428434
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +458,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452458
}
453459

454460
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455-
; CHECK-LABEL: @PR34724(
456-
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457-
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463-
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466-
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467-
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
468-
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470-
; CHECK-NEXT: ret <4 x float> [[V3]]
461+
; SSE-LABEL: @PR34724(
462+
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
463+
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
464+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
465+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
466+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
467+
; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
468+
; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
469+
; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
470+
; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
471+
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
472+
; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
473+
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
474+
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
475+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
476+
; SSE-NEXT: ret <4 x float> [[V3]]
477+
;
478+
; AVX-LABEL: @PR34724(
479+
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
480+
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
481+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
482+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
483+
; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
484+
; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
485+
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
486+
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
487+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
488+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
489+
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
490+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
491+
; AVX-NEXT: ret <4 x float> [[V3]]
471492
;
472493
%a0 = extractelement <4 x float> %a, i32 0
473494
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,18 @@ define float @ext14_ext15_fmul_v16f32(<16 x float> %x) {
417417
}
418418

419419
define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420-
; CHECK-LABEL: @ins_bo_ext_ext(
421-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425-
; CHECK-NEXT: ret <4 x float> [[V3]]
420+
; SSE-LABEL: @ins_bo_ext_ext(
421+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
425+
; SSE-NEXT: ret <4 x float> [[V3]]
426+
;
427+
; AVX-LABEL: @ins_bo_ext_ext(
428+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
429+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
430+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
431+
; AVX-NEXT: ret <4 x float> [[V3]]
426432
;
427433
%a2 = extractelement <4 x float> %a, i32 2
428434
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +458,37 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452458
}
453459

454460
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455-
; CHECK-LABEL: @PR34724(
456-
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457-
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463-
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466-
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467-
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
468-
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470-
; CHECK-NEXT: ret <4 x float> [[V3]]
461+
; SSE-LABEL: @PR34724(
462+
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
463+
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
464+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
465+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
466+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
467+
; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
468+
; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
469+
; SSE-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
470+
; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
471+
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
472+
; SSE-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
473+
; SSE-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
474+
; SSE-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
475+
; SSE-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
476+
; SSE-NEXT: ret <4 x float> [[V3]]
477+
;
478+
; AVX-LABEL: @PR34724(
479+
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
480+
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
481+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
482+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
483+
; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
484+
; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
485+
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
486+
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
487+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
488+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
489+
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
490+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
491+
; AVX-NEXT: ret <4 x float> [[V3]]
471492
;
472493
%a0 = extractelement <4 x float> %a, i32 0
473494
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) {
163163
; AVX-LABEL: @ins_fcmp_ext_ext(
164164
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
165165
; AVX-NEXT: [[TMP1:%.*]] = fcmp ugt <4 x float> [[A]], [[SHIFT]]
166-
; AVX-NEXT: [[A21:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
167-
; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A21]], i32 2
166+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x i1> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
168167
; AVX-NEXT: ret <4 x i1> [[R]]
169168
;
170169
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -537,19 +537,32 @@ define <2 x float> @load_f32_insert_v2f32_asan(ptr align 16 dereferenceable(16)
537537

538538
declare ptr @getscaleptr()
539539
define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr nocapture nonnull readonly %opptr) nofree nosync {
540-
; CHECK-LABEL: @PR47558_multiple_use_load(
541-
; CHECK-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
542-
; CHECK-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
543-
; CHECK-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
544-
; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0
545-
; CHECK-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
546-
; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
547-
; CHECK-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
548-
; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
549-
; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
550-
; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
551-
; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
552-
; CHECK-NEXT: ret void
540+
; SSE2-LABEL: @PR47558_multiple_use_load(
541+
; SSE2-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
542+
; SSE2-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
543+
; SSE2-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
544+
; SSE2-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0
545+
; SSE2-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
546+
; SSE2-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
547+
; SSE2-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
548+
; SSE2-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
549+
; SSE2-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
550+
; SSE2-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
551+
; SSE2-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
552+
; SSE2-NEXT: ret void
553+
;
554+
; AVX2-LABEL: @PR47558_multiple_use_load(
555+
; AVX2-NEXT: [[SCALEPTR:%.*]] = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
556+
; AVX2-NEXT: [[OP:%.*]] = load <2 x float>, ptr [[OPPTR:%.*]], align 4
557+
; AVX2-NEXT: [[SCALE:%.*]] = load float, ptr [[SCALEPTR]], align 16
558+
; AVX2-NEXT: [[T1:%.*]] = insertelement <2 x float> poison, float [[SCALE]], i32 0
559+
; AVX2-NEXT: [[T2:%.*]] = insertelement <2 x float> [[T1]], float [[SCALE]], i32 1
560+
; AVX2-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
561+
; AVX2-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
562+
; AVX2-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
563+
; AVX2-NEXT: [[RESULT1:%.*]] = shufflevector <2 x float> [[RESULT0]], <2 x float> [[T3]], <2 x i32> <i32 0, i32 3>
564+
; AVX2-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
565+
; AVX2-NEXT: ret void
553566
;
554567
%scaleptr = tail call nonnull align 16 dereferenceable(64) ptr @getscaleptr()
555568
%op = load <2 x float>, ptr %opptr, align 4

0 commit comments

Comments
 (0)