diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 595aed2cab182..3a1fa2e7bbbdb 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11857,11 +11857,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { if (EphValues.count(EU.User)) continue; - // Used in unreachable blocks or in landing pads (rarely executed). + // Used in unreachable blocks or in EH pads (rarely executed) or is + // terminated with unreachable instruction. if (BasicBlock *UserParent = EU.User ? cast(EU.User)->getParent() : nullptr; UserParent && - (!DT->isReachableFromEntry(UserParent) || UserParent->isLandingPad())) + (!DT->isReachableFromEntry(UserParent) || UserParent->isEHPad() || + isa_and_present(UserParent->getTerminator()))) continue; // We only add extract cost once for the same scalar. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll index 9979bb9170d48..eed772b0dd104 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll @@ -9,7 +9,7 @@ define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: invoke.cont: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[LOAD1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[P]], align 8 ; CHECK-NEXT: invoke void @throw() ; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 590e5a67bd4ce..95006071790ca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -11,41 +11,23 @@ define void @hoge() { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15 -; CHECK-NEXT: [[T3:%.*]] = sext i16 undef to i32 -; CHECK-NEXT: [[T4:%.*]] = sext i16 [[T]] to i32 -; CHECK-NEXT: [[T5:%.*]] = sub nsw i32 undef, [[T4]] -; CHECK-NEXT: [[T6:%.*]] = sub i32 [[T5]], undef -; CHECK-NEXT: [[T7:%.*]] = sub nsw i32 63, [[T3]] -; CHECK-NEXT: [[T8:%.*]] = sub i32 [[T7]], undef -; CHECK-NEXT: [[T9:%.*]] = add i32 [[T8]], undef -; CHECK-NEXT: [[T10:%.*]] = add nsw i32 [[T6]], 15 -; CHECK-NEXT: [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]] -; CHECK-NEXT: [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]] -; CHECK-NEXT: [[T13:%.*]] = add nsw i32 [[T6]], 31 -; CHECK-NEXT: [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]] -; CHECK-NEXT: [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]] -; CHECK-NEXT: [[T16:%.*]] = add nsw i32 [[T6]], 47 -; CHECK-NEXT: [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]] -; CHECK-NEXT: [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> , i16 [[T]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], +; CHECK-NEXT: [[T18:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 -; CHECK-NEXT: [[T21:%.*]] = sub nsw i32 undef, [[T3]] -; CHECK-NEXT: [[T22:%.*]] = sub i32 [[T21]], undef -; CHECK-NEXT: [[T23:%.*]] = sub nsw i32 undef, [[T4]] -; CHECK-NEXT: [[T24:%.*]] = sub i32 [[T23]], undef -; CHECK-NEXT: [[T25:%.*]] = add nsw i32 [[T24]], -49 -; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T22]], -33 -; CHECK-NEXT: [[T35:%.*]] = add nsw i32 [[T24]], -33 -; CHECK-NEXT: [[T40:%.*]] = add nsw i32 [[T22]], -17 +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[T25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T30]], [[T35]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]] -; CHECK-NEXT: [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T40]] -; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]] -; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]] +; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]] ; CHECK-NEXT: unreachable ; bb: