Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2282,6 +2282,17 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
if (!Match0 && !Match1)
return false;

// If the outer shuffle is a permute, then create a fake inner all-poison
// shuffle. This is easier than accounting for length-changing shuffles below.
SmallVector<int, 16> PoisonMask1;
if (!Match1 && isa<PoisonValue>(OuterV1)) {
X1 = X0;
Y1 = Y0;
PoisonMask1.append(InnerMask0.size(), PoisonMaskElem);
InnerMask1 = PoisonMask1;
Match1 = true; // fake match
}

X0 = Match0 ? X0 : OuterV0;
Y0 = Match0 ? Y0 : OuterV0;
X1 = Match1 ? X1 : OuterV1;
Expand Down Expand Up @@ -2356,11 +2367,11 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) {
// Try to merge the shuffles if the new shuffle is not costly.
InstructionCost InnerCost0 = 0;
if (Match0)
InnerCost0 = TTI.getInstructionCost(cast<Instruction>(OuterV0), CostKind);
InnerCost0 = TTI.getInstructionCost(cast<User>(OuterV0), CostKind);

InstructionCost InnerCost1 = 0;
if (Match1)
InnerCost1 = TTI.getInstructionCost(cast<Instruction>(OuterV1), CostKind);
InnerCost1 = TTI.getInstructionCost(cast<User>(OuterV1), CostKind);

InstructionCost OuterCost = TTI.getInstructionCost(&I, CostKind);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,8 @@ define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) {

define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) {
; CHECK-LABEL: @splattwice(
; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[AS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[BS]], <4 x half> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[R:%.*]] = fadd <8 x half> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x half> [[R]]
;
Expand Down
36 changes: 16 additions & 20 deletions llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 0
Expand All @@ -48,8 +47,7 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b)
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 0
Expand Down Expand Up @@ -86,8 +84,7 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
Expand All @@ -96,10 +93,14 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
}

define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
; CHECK-NEXT: ret <4 x double> [[INS]]
; SSE-LABEL: @src_ins3_v4f64_ext1_v2f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
; SSE-NEXT: ret <4 x double> [[INS]]
;
; AVX-LABEL: @src_ins3_v4f64_ext1_v2f64(
; AVX-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 1>
; AVX-NEXT: ret <4 x double> [[INS]]
;
%ext = extractelement <2 x double> %b, i32 1
%ins = insertelement <4 x double> poison, double %ext, i32 3
Expand All @@ -119,8 +120,7 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)

define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 1, i32 poison>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 1
Expand Down Expand Up @@ -152,8 +152,7 @@ define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b)

define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 0, i32 poison>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 0
Expand All @@ -164,8 +163,7 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b)
define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 1>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 1>
; CHECK-NEXT: ret <2 x double> [[INS]]
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%ext = extractelement <4 x double> %b, i32 1
%ins = insertelement <2 x double> poison, double %ext, i32 1
Expand All @@ -174,8 +172,7 @@ define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b)

define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) {
; SSE-LABEL: @src_ins1_v2f64_ext2_v4f64(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 2, i32 poison>
; SSE-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 2>
; SSE-NEXT: ret <2 x double> [[INS]]
;
; AVX-LABEL: @src_ins1_v2f64_ext2_v4f64(
Expand All @@ -190,8 +187,7 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b)

define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) {
; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 3, i32 poison>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 poison, i32 0>
; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> <i32 poison, i32 3>
; CHECK-NEXT: ret <2 x double> [[INS]]
;
%ext = extractelement <4 x double> %b, i32 3
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -578,8 +578,7 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer
; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4
; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: ret <8 x i32> [[R]]
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
;
%l = load <1 x i32>, ptr %p, align 4
store <1 x i32> %l, ptr %store_ptr
Expand Down
Loading