From 6a06131973ea10e1407a3662567fed410875f405 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 11 Oct 2024 20:54:25 +0000 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 6 ++- .../X86/insertelements-with-reused-indices.ll | 14 +++--- .../Transforms/SLPVectorizer/X86/partail.ll | 30 ++++++------- .../Transforms/SLPVectorizer/X86/pr27163.ll | 2 +- .../SLPVectorizer/X86/reorder_repeated_ops.ll | 44 ++++++------------- .../SLPVectorizer/X86/slp-throttle.ll | 4 +- .../SLPVectorizer/scalarization-overhead.ll | 13 +++--- 7 files changed, 47 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9826a8e8f8c67..29ef77db58870 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11748,11 +11748,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { if (EphValues.count(EU.User)) continue; - // Used in unreachable blocks or in landing pads (rarely executed). + // Used in unreachable blocks or in EH pads (rarely executed) or is + // terminated with unreachable instruction. if (BasicBlock *UserParent = EU.User ? cast(EU.User)->getParent() : nullptr; UserParent && - (!DT->isReachableFromEntry(UserParent) || UserParent->isLandingPad())) + (!DT->isReachableFromEntry(UserParent) || UserParent->isEHPad() || + isa_and_present(UserParent->getTerminator()))) continue; // We only add extract cost once for the same scalar. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll index e46c3b94383ff..c154f5de3c782 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insertelements-with-reused-indices.ll @@ -4,12 +4,16 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> undef, float 0.000000e+00, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float 0.000000e+00, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = fsub float 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fsub float 0.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[TMP10:%.*]] = fmul float 0.000000e+00, [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float 0.000000e+00, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = fsub float 0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub float [[TMP7]], [[TMP10]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float [[TMP8]], i32 0 ; CHECK-NEXT: unreachable ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll index 0b9ed47ce0f17..154b03c1107ec 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll @@ -13,28 +13,26 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 { ; CHECK: if.end: ; CHECK-NEXT: [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef ; CHECK-NEXT: [[SHR15:%.*]] = ashr i32 [[SUB14]], 2 -; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt i32 [[SHR15]], 0 -; CHECK-NEXT: [[COND_I_I:%.*]] = select i1 [[CMP_I_I]], i32 [[SHR15]], i32 0 -; CHECK-NEXT: [[CMP_I4_I:%.*]] = icmp slt i32 [[COND_I_I]], undef -; CHECK-NEXT: [[COND_I5_I:%.*]] = select i1 [[CMP_I4_I]], i32 [[COND_I_I]], i32 undef +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SUB14]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SHR15]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 undef, i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef +; CHECK-NEXT: [[COND_I5_I:%.*]] = extractelement <4 x i32> [[TMP9]], i32 3 ; CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[COND_I5_I]] to i64 ; CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30]] -; CHECK-NEXT: [[CMP_I_I_1:%.*]] = icmp sgt i32 [[SUB14]], -1 -; CHECK-NEXT: [[COND_I_I_1:%.*]] = select i1 [[CMP_I_I_1]], i32 undef, i32 0 -; CHECK-NEXT: [[CMP_I4_I_1:%.*]] = icmp slt i32 [[COND_I_I_1]], undef -; CHECK-NEXT: [[COND_I5_I_1:%.*]] = select i1 [[CMP_I4_I_1]], i32 [[COND_I_I_1]], i32 undef +; CHECK-NEXT: [[COND_I5_I_1:%.*]] = extractelement <4 x i32> [[TMP9]], i32 2 ; CHECK-NEXT: [[IDXPROM30_1:%.*]] = sext i32 [[COND_I5_I_1]] to i64 ; CHECK-NEXT: [[ARRAYIDX31_1:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_1]] -; CHECK-NEXT: [[CMP_I_I_2:%.*]] = icmp sgt i32 [[SUB14]], -5 -; CHECK-NEXT: [[COND_I_I_2:%.*]] = select i1 [[CMP_I_I_2]], i32 undef, i32 0 -; CHECK-NEXT: [[CMP_I4_I_2:%.*]] = icmp slt i32 [[COND_I_I_2]], undef -; CHECK-NEXT: [[COND_I5_I_2:%.*]] = select i1 [[CMP_I4_I_2]], i32 [[COND_I_I_2]], i32 undef +; CHECK-NEXT: [[COND_I5_I_2:%.*]] = extractelement <4 x i32> [[TMP9]], i32 1 ; CHECK-NEXT: [[IDXPROM30_2:%.*]] = sext i32 [[COND_I5_I_2]] to i64 ; CHECK-NEXT: [[ARRAYIDX31_2:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_2]] -; CHECK-NEXT: [[CMP_I_I_3:%.*]] = icmp sgt i32 [[SUB14]], -9 -; CHECK-NEXT: [[COND_I_I_3:%.*]] = select i1 [[CMP_I_I_3]], i32 undef, i32 0 -; CHECK-NEXT: [[CMP_I4_I_3:%.*]] = icmp slt i32 [[COND_I_I_3]], undef -; CHECK-NEXT: [[COND_I5_I_3:%.*]] = select i1 [[CMP_I4_I_3]], i32 [[COND_I_I_3]], i32 undef +; CHECK-NEXT: [[COND_I5_I_3:%.*]] = extractelement <4 x i32> [[TMP9]], i32 0 ; CHECK-NEXT: [[IDXPROM30_3:%.*]] = sext i32 [[COND_I5_I_3]] to i64 ; CHECK-NEXT: [[ARRAYIDX31_3:%.*]] = getelementptr inbounds ptr, ptr undef, i64 [[IDXPROM30_3]] ; CHECK-NEXT: unreachable diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll index 9979bb9170d48..eed772b0dd104 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr27163.ll @@ -9,7 +9,7 @@ define void @test1(ptr %p) personality ptr @__CxxFrameHandler3 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: invoke.cont: ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8 -; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[LOAD1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[P]], align 8 ; CHECK-NEXT: invoke void @throw() ; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CATCH_DISPATCH:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll index 590e5a67bd4ce..95006071790ca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -11,41 +11,23 @@ define void @hoge() { ; CHECK-NEXT: ret void ; CHECK: bb2: ; CHECK-NEXT: [[T:%.*]] = select i1 undef, i16 undef, i16 15 -; CHECK-NEXT: [[T3:%.*]] = sext i16 undef to i32 -; CHECK-NEXT: [[T4:%.*]] = sext i16 [[T]] to i32 -; CHECK-NEXT: [[T5:%.*]] = sub nsw i32 undef, [[T4]] -; CHECK-NEXT: [[T6:%.*]] = sub i32 [[T5]], undef -; CHECK-NEXT: [[T7:%.*]] = sub nsw i32 63, [[T3]] -; CHECK-NEXT: [[T8:%.*]] = sub i32 [[T7]], undef -; CHECK-NEXT: [[T9:%.*]] = add i32 [[T8]], undef -; CHECK-NEXT: [[T10:%.*]] = add nsw i32 [[T6]], 15 -; CHECK-NEXT: [[T11:%.*]] = icmp sgt i32 [[T9]], [[T10]] -; CHECK-NEXT: [[T12:%.*]] = select i1 [[T11]], i32 [[T9]], i32 [[T10]] -; CHECK-NEXT: [[T13:%.*]] = add nsw i32 [[T6]], 31 -; CHECK-NEXT: [[T14:%.*]] = icmp sgt i32 [[T12]], [[T13]] -; CHECK-NEXT: [[T15:%.*]] = select i1 [[T14]], i32 [[T12]], i32 [[T13]] -; CHECK-NEXT: [[T16:%.*]] = add nsw i32 [[T6]], 47 -; CHECK-NEXT: [[T17:%.*]] = icmp sgt i32 [[T15]], [[T16]] -; CHECK-NEXT: [[T18:%.*]] = select i1 [[T17]], i32 [[T15]], i32 [[T16]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> , i16 [[T]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <2 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], undef +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], +; CHECK-NEXT: [[T18:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[T19:%.*]] = select i1 undef, i32 [[T18]], i32 undef ; CHECK-NEXT: [[T20:%.*]] = icmp sgt i32 [[T19]], 63 -; CHECK-NEXT: [[T21:%.*]] = sub nsw i32 undef, [[T3]] -; CHECK-NEXT: [[T22:%.*]] = sub i32 [[T21]], undef -; CHECK-NEXT: [[T23:%.*]] = sub nsw i32 undef, [[T4]] -; CHECK-NEXT: [[T24:%.*]] = sub i32 [[T23]], undef -; CHECK-NEXT: [[T25:%.*]] = add nsw i32 [[T24]], -49 -; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T22]], -33 -; CHECK-NEXT: [[T35:%.*]] = add nsw i32 [[T24]], -33 -; CHECK-NEXT: [[T40:%.*]] = add nsw i32 [[T22]], -17 +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], +; CHECK-NEXT: [[T25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP10]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = icmp slt i32 undef, [[T25]] ; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[OP_RDX]], i32 undef, i32 [[T25]] -; CHECK-NEXT: [[OP_RDX2:%.*]] = icmp slt i32 [[T30]], [[T35]] -; CHECK-NEXT: [[OP_RDX3:%.*]] = select i1 [[OP_RDX2]], i32 [[T30]], i32 [[T35]] -; CHECK-NEXT: [[OP_RDX4:%.*]] = icmp slt i32 [[OP_RDX1]], [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX5:%.*]] = select i1 [[OP_RDX4]], i32 [[OP_RDX1]], i32 [[OP_RDX3]] -; CHECK-NEXT: [[OP_RDX6:%.*]] = icmp slt i32 [[OP_RDX5]], [[T40]] -; CHECK-NEXT: [[OP_RDX7:%.*]] = select i1 [[OP_RDX6]], i32 [[OP_RDX5]], i32 [[T40]] -; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX7]] +; CHECK-NEXT: [[T45:%.*]] = icmp sgt i32 undef, [[OP_RDX1]] ; CHECK-NEXT: unreachable ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll index d9496a3e3e343..f7935c7af9631 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-throttle.ll @@ -5,11 +5,9 @@ define dso_local void @rftbsub(ptr %a) local_unnamed_addr #0 { ; CHECK-LABEL: @rftbsub( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 2 -; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 2, 1 -; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[SUB22:%.*]] = fsub double undef, undef -; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD16:%.*]] = fadd double [[TMP1]], undef ; CHECK-NEXT: [[MUL18:%.*]] = fmul double undef, [[ADD16]] ; CHECK-NEXT: [[ADD19:%.*]] = fadd double undef, [[MUL18]] diff --git a/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll index 9f6b285f1ab90..372202bd0cbd6 100644 --- a/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/scalarization-overhead.ll @@ -7,9 +7,8 @@ define i16 @D134605() { ; CHECK-LABEL: @D134605( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX81:%.*]] = getelementptr inbounds [32 x i16], ptr poison, i16 0, i16 3 -; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX81]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr poison, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 ; CHECK-NEXT: [[REASS_ADD:%.*]] = add i16 poison, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = mul i16 [[TMP2]], 2 @@ -45,12 +44,10 @@ declare i16 @check_i16(i16, i16, i16) define void @PR58054() { ; CHECK-LABEL: @PR58054( -; CHECK-NEXT: [[VAL:%.*]] = add i64 poison, poison -; CHECK-NEXT: [[VAL2:%.*]] = add i64 poison, poison -; CHECK-NEXT: [[VAL3:%.*]] = mul i64 [[VAL2]], [[VAL]] -; CHECK-NEXT: [[VAL4:%.*]] = mul i64 [[VAL3]], [[VAL2]] -; CHECK-NEXT: [[VAL5:%.*]] = mul i64 [[VAL4]], [[VAL2]] -; CHECK-NEXT: [[VAL7:%.*]] = add i64 [[VAL]], [[VAL5]] +; CHECK-NEXT: [[VAL3:%.*]] = mul i64 poison, poison +; CHECK-NEXT: [[VAL4:%.*]] = mul i64 [[VAL3]], poison +; CHECK-NEXT: [[VAL5:%.*]] = mul i64 [[VAL4]], poison +; CHECK-NEXT: [[VAL7:%.*]] = add i64 poison, [[VAL5]] ; CHECK-NEXT: [[VAL8:%.*]] = sitofp i64 [[VAL7]] to double ; CHECK-NEXT: call void @wibble(i32 poison, double [[VAL8]], i64 poison) ; CHECK-NEXT: unreachable