-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[VectorCombine] support mismatching extract/insert indices for foldInsExtFNeg #126408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d91acb8
ea149f5
05d486f
b15fc1f
1a7640c
c315e4e
6e08857
87f432c
61bc3e0
4ca25c8
7ab8bd8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -696,79 +696,86 @@ bool VectorCombine::foldExtractExtract(Instruction &I) { | |
| /// shuffle. | ||
| bool VectorCombine::foldInsExtFNeg(Instruction &I) { | ||
| // Match an insert (op (extract)) pattern. | ||
| Value *DestVec; | ||
| uint64_t Index; | ||
| Value *DstVec; | ||
| uint64_t ExtIdx, InsIdx; | ||
| Instruction *FNeg; | ||
| if (!match(&I, m_InsertElt(m_Value(DestVec), m_OneUse(m_Instruction(FNeg)), | ||
| m_ConstantInt(Index)))) | ||
| if (!match(&I, m_InsertElt(m_Value(DstVec), m_OneUse(m_Instruction(FNeg)), | ||
| m_ConstantInt(InsIdx)))) | ||
| return false; | ||
|
|
||
| // Note: This handles the canonical fneg instruction and "fsub -0.0, X". | ||
| Value *SrcVec; | ||
| Instruction *Extract; | ||
| if (!match(FNeg, m_FNeg(m_CombineAnd( | ||
| m_Instruction(Extract), | ||
| m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index)))))) | ||
| m_ExtractElt(m_Value(SrcVec), m_ConstantInt(ExtIdx)))))) | ||
| return false; | ||
|
|
||
| auto *VecTy = cast<FixedVectorType>(I.getType()); | ||
| auto *ScalarTy = VecTy->getScalarType(); | ||
| auto *DstVecTy = cast<FixedVectorType>(DstVec->getType()); | ||
| auto *DstVecScalarTy = DstVecTy->getScalarType(); | ||
| auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcVec->getType()); | ||
| if (!SrcVecTy || ScalarTy != SrcVecTy->getScalarType()) | ||
| if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType()) | ||
| return false; | ||
|
|
||
| // Ignore bogus insert/extract index. | ||
| unsigned NumElts = VecTy->getNumElements(); | ||
| if (Index >= NumElts) | ||
| // Ignore if insert/extract index is out of bounds or destination vector has | ||
| // one element | ||
| unsigned NumDstElts = DstVecTy->getNumElements(); | ||
| unsigned NumSrcElts = SrcVecTy->getNumElements(); | ||
| if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1) | ||
| return false; | ||
|
|
||
| // We are inserting the negated element into the same lane that we extracted | ||
| // from. This is equivalent to a select-shuffle that chooses all but the | ||
| // negated element from the destination vector. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the comment |
||
| SmallVector<int> Mask(NumElts); | ||
| SmallVector<int> Mask(NumDstElts); | ||
| std::iota(Mask.begin(), Mask.end(), 0); | ||
| Mask[Index] = Index + NumElts; | ||
| Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts; | ||
| InstructionCost OldCost = | ||
| TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy, CostKind) + | ||
| TTI.getVectorInstrCost(I, VecTy, CostKind, Index); | ||
| TTI.getArithmeticInstrCost(Instruction::FNeg, DstVecScalarTy, CostKind) + | ||
| TTI.getVectorInstrCost(I, DstVecTy, CostKind, InsIdx); | ||
|
|
||
| // If the extract has one use, it will be eliminated, so count it in the | ||
| // original cost. If it has more than one use, ignore the cost because it will | ||
| // be the same before/after. | ||
| if (Extract->hasOneUse()) | ||
| OldCost += TTI.getVectorInstrCost(*Extract, VecTy, CostKind, Index); | ||
| OldCost += TTI.getVectorInstrCost(*Extract, SrcVecTy, CostKind, ExtIdx); | ||
|
|
||
| InstructionCost NewCost = | ||
| TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy, CostKind) + | ||
| TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTy, VecTy, | ||
| Mask, CostKind); | ||
| TTI.getArithmeticInstrCost(Instruction::FNeg, SrcVecTy, CostKind) + | ||
| TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, DstVecTy, | ||
| DstVecTy, Mask, CostKind); | ||
|
|
||
| bool NeedLenChg = SrcVecTy->getNumElements() != NumElts; | ||
| bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts; | ||
| // If the lengths of the two vectors are not equal, | ||
| // we need to add a length-change vector. Add this cost. | ||
| SmallVector<int> SrcMask; | ||
| if (NeedLenChg) { | ||
| SrcMask.assign(NumElts, PoisonMaskElem); | ||
| SrcMask[Index] = Index; | ||
| SrcMask.assign(NumDstElts, PoisonMaskElem); | ||
| SrcMask[ExtIdx % NumDstElts] = ExtIdx; | ||
| NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, | ||
| VecTy, SrcVecTy, SrcMask, CostKind); | ||
| DstVecTy, SrcVecTy, SrcMask, CostKind); | ||
| } | ||
|
|
||
| LLVM_DEBUG(dbgs() << "Found an insertion of (extract)fneg : " << I | ||
| << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost | ||
| << "\n"); | ||
| if (NewCost > OldCost) | ||
| return false; | ||
|
|
||
| Value *NewShuf; | ||
| // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index | ||
| Value *NewShuf, *LenChgShuf = nullptr; | ||
| // insertelt DstVec, (fneg (extractelt SrcVec, Index)), Index | ||
| Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg); | ||
| if (NeedLenChg) { | ||
| // shuffle DestVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask | ||
| Value *LenChgShuf = Builder.CreateShuffleVector(VecFNeg, SrcMask); | ||
| NewShuf = Builder.CreateShuffleVector(DestVec, LenChgShuf, Mask); | ||
| // shuffle DstVec, (shuffle (fneg SrcVec), poison, SrcMask), Mask | ||
| LenChgShuf = Builder.CreateShuffleVector(VecFNeg, SrcMask); | ||
| NewShuf = Builder.CreateShuffleVector(DstVec, LenChgShuf, Mask); | ||
| Worklist.pushValue(LenChgShuf); | ||
| } else { | ||
| // shuffle DestVec, (fneg SrcVec), Mask | ||
| NewShuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask); | ||
| // shuffle DstVec, (fneg SrcVec), Mask | ||
| NewShuf = Builder.CreateShuffleVector(DstVec, VecFNeg, Mask); | ||
| } | ||
|
|
||
| Worklist.pushValue(VecFNeg); | ||
RKSimon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| replaceValue(I, *NewShuf); | ||
| return true; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,6 +58,19 @@ define <4 x float> @ext2_v2f32v4f32(<2 x float> %x, <4 x float> %y) { | |
| ret <4 x float> %r | ||
| } | ||
|
|
||
| define <2 x float> @ext2_v4f32v2f32(<4 x float> %x, <2 x float> %y) { | ||
| ; CHECK-LABEL: @ext2_v4f32v2f32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 poison, i32 3> | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3> | ||
| ; CHECK-NEXT: ret <2 x float> [[R]] | ||
| ; | ||
| %e = extractelement <4 x float> %x, i32 3 | ||
| %n = fneg float %e | ||
| %r = insertelement <2 x float> %y, float %n, i32 1 | ||
| ret <2 x float> %r | ||
| } | ||
|
|
||
| ; Eliminating extract/insert is still profitable. Flags propagate. | ||
|
|
||
| define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) { | ||
|
|
@@ -73,24 +86,31 @@ define <2 x double> @ext1_v2f64(<2 x double> %x, <2 x double> %y) { | |
| } | ||
|
|
||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have a test case where the dst vector is larger than the src vector?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll update it ASAP |
||
| define <4 x double> @ext1_v2f64v4f64(<2 x double> %x, <4 x double> %y) { | ||
| ; SSE-LABEL: @ext1_v2f64v4f64( | ||
| ; SSE-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 | ||
| ; SSE-NEXT: [[N:%.*]] = fneg nsz double [[E]] | ||
| ; SSE-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 1 | ||
| ; SSE-NEXT: ret <4 x double> [[R]] | ||
| ; | ||
| ; AVX-LABEL: @ext1_v2f64v4f64( | ||
| ; AVX-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]] | ||
| ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison> | ||
| ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> | ||
| ; AVX-NEXT: ret <4 x double> [[R]] | ||
| ; CHECK-LABEL: @ext1_v2f64v4f64( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison> | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3> | ||
| ; CHECK-NEXT: ret <4 x double> [[R]] | ||
| ; | ||
| %e = extractelement <2 x double> %x, i32 1 | ||
| %n = fneg nsz double %e | ||
| %r = insertelement <4 x double> %y, double %n, i32 1 | ||
| ret <4 x double> %r | ||
| } | ||
|
|
||
| define <2 x double> @ext1_v4f64v2f64(<4 x double> %x, <2 x double> %y) { | ||
| ; CHECK-LABEL: @ext1_v4f64v2f64( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <4 x double> [[X:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <2 x i32> <i32 poison, i32 3> | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3> | ||
| ; CHECK-NEXT: ret <2 x double> [[R]] | ||
| ; | ||
| %e = extractelement <4 x double> %x, i32 3 | ||
| %n = fneg nsz double %e | ||
| %r = insertelement <2 x double> %y, double %n, i32 1 | ||
| ret <2 x double> %r | ||
| } | ||
|
|
||
| define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) { | ||
| ; CHECK-LABEL: @ext7_v8f32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]] | ||
|
|
@@ -105,9 +125,9 @@ define <8 x float> @ext7_v8f32(<8 x float> %x, <8 x float> %y) { | |
|
|
||
| define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) { | ||
| ; CHECK-LABEL: @ext7_v4f32v8f32( | ||
| ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 | ||
| ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] | ||
| ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 7 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 11> | ||
| ; CHECK-NEXT: ret <8 x float> [[R]] | ||
| ; | ||
| %e = extractelement <4 x float> %x, i32 3 | ||
|
|
@@ -116,6 +136,19 @@ define <8 x float> @ext7_v4f32v8f32(<4 x float> %x, <8 x float> %y) { | |
| ret <8 x float> %r | ||
| } | ||
|
|
||
| define <4 x float> @ext7_v8f32v4f32(<8 x float> %x, <4 x float> %y) { | ||
| ; CHECK-LABEL: @ext7_v8f32v4f32( | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 7> | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> | ||
| ; CHECK-NEXT: ret <4 x float> [[R]] | ||
| ; | ||
| %e = extractelement <8 x float> %x, i32 7 | ||
| %n = fneg float %e | ||
| %r = insertelement <4 x float> %y, float %n, i32 3 | ||
| ret <4 x float> %r | ||
| } | ||
|
|
||
| ; Same as above with an extra use of the extracted element. | ||
|
|
||
| define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) { | ||
|
|
@@ -141,12 +174,20 @@ define <8 x float> @ext7_v8f32_use1(<8 x float> %x, <8 x float> %y) { | |
| } | ||
|
|
||
| define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) { | ||
| ; CHECK-LABEL: @ext7_v4f32v8f32_use1( | ||
| ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 | ||
| ; CHECK-NEXT: call void @use(float [[E]]) | ||
| ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] | ||
| ; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3 | ||
| ; CHECK-NEXT: ret <8 x float> [[R]] | ||
| ; SSE-LABEL: @ext7_v4f32v8f32_use1( | ||
| ; SSE-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 | ||
| ; SSE-NEXT: call void @use(float [[E]]) | ||
| ; SSE-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X]] | ||
| ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> | ||
| ; SSE-NEXT: [[R:%.*]] = shufflevector <8 x float> [[Y:%.*]], <8 x float> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 4, i32 5, i32 6, i32 7> | ||
| ; SSE-NEXT: ret <8 x float> [[R]] | ||
| ; | ||
| ; AVX-LABEL: @ext7_v4f32v8f32_use1( | ||
| ; AVX-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3 | ||
| ; AVX-NEXT: call void @use(float [[E]]) | ||
| ; AVX-NEXT: [[N:%.*]] = fneg float [[E]] | ||
| ; AVX-NEXT: [[R:%.*]] = insertelement <8 x float> [[Y:%.*]], float [[N]], i32 3 | ||
| ; AVX-NEXT: ret <8 x float> [[R]] | ||
| ; | ||
| %e = extractelement <4 x float> %x, i32 3 | ||
| call void @use(float %e) | ||
|
|
@@ -155,6 +196,29 @@ define <8 x float> @ext7_v4f32v8f32_use1(<4 x float> %x, <8 x float> %y) { | |
| ret <8 x float> %r | ||
| } | ||
|
|
||
| define <4 x float> @ext7_v8f32v4f32_use1(<8 x float> %x, <4 x float> %y) { | ||
| ; SSE-LABEL: @ext7_v8f32v4f32_use1( | ||
| ; SSE-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7 | ||
| ; SSE-NEXT: call void @use(float [[E]]) | ||
| ; SSE-NEXT: [[TMP1:%.*]] = fneg <8 x float> [[X]] | ||
| ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 7> | ||
| ; SSE-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> | ||
| ; SSE-NEXT: ret <4 x float> [[R]] | ||
| ; | ||
| ; AVX-LABEL: @ext7_v8f32v4f32_use1( | ||
| ; AVX-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7 | ||
| ; AVX-NEXT: call void @use(float [[E]]) | ||
| ; AVX-NEXT: [[N:%.*]] = fneg float [[E]] | ||
| ; AVX-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 3 | ||
| ; AVX-NEXT: ret <4 x float> [[R]] | ||
| ; | ||
| %e = extractelement <8 x float> %x, i32 7 | ||
| call void @use(float %e) | ||
| %n = fneg float %e | ||
| %r = insertelement <4 x float> %y, float %n, i32 3 | ||
| ret <4 x float> %r | ||
| } | ||
|
|
||
| ; Negative test - the transform is likely not profitable if the fneg has another use. | ||
|
|
||
| define <8 x float> @ext7_v8f32_use2(<8 x float> %x, <8 x float> %y) { | ||
|
|
@@ -187,6 +251,21 @@ define <8 x float> @ext7_v4f32v8f32_use2(<4 x float> %x, <8 x float> %y) { | |
| ret <8 x float> %r | ||
| } | ||
|
|
||
| define <4 x float> @ext7_v8f32v4f32_use2(<8 x float> %x, <4 x float> %y) { | ||
| ; CHECK-LABEL: @ext7_v8f32v4f32_use2( | ||
| ; CHECK-NEXT: [[E:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7 | ||
| ; CHECK-NEXT: [[N:%.*]] = fneg float [[E]] | ||
| ; CHECK-NEXT: call void @use(float [[N]]) | ||
| ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 3 | ||
| ; CHECK-NEXT: ret <4 x float> [[R]] | ||
| ; | ||
| %e = extractelement <8 x float> %x, i32 7 | ||
| %n = fneg float %e | ||
| call void @use(float %n) | ||
| %r = insertelement <4 x float> %y, float %n, i32 3 | ||
| ret <4 x float> %r | ||
| } | ||
|
|
||
| ; Negative test - can't convert variable index to a shuffle. | ||
|
|
||
| define <2 x double> @ext_index_var_v2f64(<2 x double> %x, <2 x double> %y, i32 %index) { | ||
|
|
@@ -215,14 +294,10 @@ define <4 x double> @ext_index_var_v2f64v4f64(<2 x double> %x, <4 x double> %y, | |
| ret <4 x double> %r | ||
| } | ||
|
|
||
| ; Negative test - require same extract/insert index for simple shuffle. | ||
| ; TODO: We could handle this by adjusting the cost calculation. | ||
|
|
||
| define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) { | ||
| ; CHECK-LABEL: @ext1_v2f64_ins0( | ||
| ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] | ||
| ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 0 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]] | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP1]], <2 x i32> <i32 3, i32 1> | ||
| ; CHECK-NEXT: ret <2 x double> [[R]] | ||
| ; | ||
| %e = extractelement <2 x double> %x, i32 1 | ||
|
|
@@ -231,12 +306,11 @@ define <2 x double> @ext1_v2f64_ins0(<2 x double> %x, <2 x double> %y) { | |
| ret <2 x double> %r | ||
| } | ||
|
|
||
| ; Negative test - extract from an index greater than the vector width of the destination | ||
| define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) { | ||
| ; CHECK-LABEL: @ext3_v4f64v2f64( | ||
| ; CHECK-NEXT: [[E:%.*]] = extractelement <4 x double> [[X:%.*]], i32 3 | ||
| ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] | ||
| ; CHECK-NEXT: [[R:%.*]] = insertelement <2 x double> [[Y:%.*]], double [[N]], i32 1 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = fneg nsz <4 x double> [[X:%.*]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <2 x i32> <i32 poison, i32 3> | ||
| ; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> [[TMP2]], <2 x i32> <i32 0, i32 3> | ||
| ; CHECK-NEXT: ret <2 x double> [[R]] | ||
| ; | ||
| %e = extractelement <4 x double> %x, i32 3 | ||
|
|
@@ -246,11 +320,17 @@ define <2 x double> @ext3_v4f64v2f64(<4 x double> %x, <2 x double> %y) { | |
| } | ||
|
|
||
| define <4 x double> @ext1_v2f64v4f64_ins0(<2 x double> %x, <4 x double> %y) { | ||
| ; CHECK-LABEL: @ext1_v2f64v4f64_ins0( | ||
| ; CHECK-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 | ||
| ; CHECK-NEXT: [[N:%.*]] = fneg nsz double [[E]] | ||
| ; CHECK-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0 | ||
| ; CHECK-NEXT: ret <4 x double> [[R]] | ||
| ; SSE-LABEL: @ext1_v2f64v4f64_ins0( | ||
| ; SSE-NEXT: [[TMP1:%.*]] = fneg nsz <2 x double> [[X:%.*]] | ||
| ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison> | ||
| ; SSE-NEXT: [[R:%.*]] = shufflevector <4 x double> [[Y:%.*]], <4 x double> [[TMP2]], <4 x i32> <i32 5, i32 1, i32 2, i32 3> | ||
| ; SSE-NEXT: ret <4 x double> [[R]] | ||
| ; | ||
| ; AVX-LABEL: @ext1_v2f64v4f64_ins0( | ||
| ; AVX-NEXT: [[E:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 | ||
| ; AVX-NEXT: [[N:%.*]] = fneg nsz double [[E]] | ||
| ; AVX-NEXT: [[R:%.*]] = insertelement <4 x double> [[Y:%.*]], double [[N]], i32 0 | ||
| ; AVX-NEXT: ret <4 x double> [[R]] | ||
| ; | ||
| %e = extractelement <2 x double> %x, i32 1 | ||
| %n = fneg nsz double %e | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ExtIdx >= NumSrcElts ?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When the Vector Element is 1, the extractable element is 0. so, equal may exceed the bounds of SrcVector.