From 9c43e6ce79afc395a5a6fce842ee811b3bdcc73e Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Fri, 22 Nov 2024 01:42:42 -0800 Subject: [PATCH 1/3] [SLP][REVEC] Pre-commit test. --- llvm/test/Transforms/SLPVectorizer/revec.ll | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index aec81086105d6..208b042c70353 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -409,3 +409,23 @@ entry: %23 = fcmp ogt <8 x float> zeroinitializer, %19 ret void } + +define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) { +entry: + %1 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> + %2 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> + %3 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> + %4 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> + br label %for.end.loopexit + +for.end.loopexit: + %phi0 = phi <4 x i32> [ %1, %entry ] + %phi1 = phi <4 x i32> [ %2, %entry ] + %phi2 = phi <4 x i32> [ %3, %entry ] + %phi3 = phi <4 x i32> [ %4, %entry ] + %or0 = or <4 x i32> %phi1, zeroinitializer + store <4 x i32> %or0, ptr %out0, align 4 + store <4 x i32> %1, ptr %out1, align 4 + store <4 x i32> %4, ptr %out2, align 4 + ret void +} From a900cefadfcc9b1b62c82a8681c5957288c1f601 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Fri, 22 Nov 2024 01:45:54 -0800 Subject: [PATCH 2/3] [SLP][REVEC] Make reorderTopToBottom support ShuffleVectorInst. We don't want reorderTopToBottom to reorder ShuffleVectorInst (because ShuffleVectorInst currently supports only a limited set of patterns). Either we make ShuffleVectorInst support more patterns, or we let ReorderIndices reorder the result of the vectorization of ShuffleVectorInst. We choose the latter solution. --- .../lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 +++- llvm/test/Transforms/SLPVectorizer/revec.ll | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fb30d46cfda1b..c173fb248f44d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6005,10 +6005,12 @@ void BoUpSLP::reorderTopToBottom() { if ((TE->State == TreeEntry::Vectorize || TE->State == TreeEntry::StridedVectorize) && isa(TE->getMainOp())) { + InsertElementInst, ShuffleVectorInst>(TE->getMainOp())) { assert(!TE->isAltShuffle() && "Alternate instructions are only supported by BinaryOperator " "and CastInst."); + assert(!isa(TE->getMainOp()) || + SLPReVec && "Only supported by REVEC."); // Build correct orders for extract{element,value}, loads and // stores. reorderOrder(TE->ReorderIndices, Mask); diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index 208b042c70353..b160c0174c0a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -411,6 +411,24 @@ entry: } define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) { +; CHECK-LABEL: @test13( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> +; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[TMP5:%.*]] = phi <16 x i32> [ [[TMP4]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP5]], i64 12) +; CHECK-NEXT: [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer +; CHECK-NEXT: store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 0) +; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 8) +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4 +; CHECK-NEXT: ret void +; entry: %1 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> %2 = shufflevector <8 x i32> %0, <8 x i32> zeroinitializer, <4 x i32> From acb24760fbaab2ef8c6d0f9ab170dd06e3a3cc04 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Fri, 22 Nov 2024 03:18:35 -0800 Subject: [PATCH 3/3] apply comment --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c173fb248f44d..eb86480266b85 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6004,13 +6004,12 @@ void BoUpSLP::reorderTopToBottom() { } if ((TE->State == TreeEntry::Vectorize || TE->State == TreeEntry::StridedVectorize) && - isa(TE->getMainOp())) { + (isa(TE->getMainOp()) || + (SLPReVec && isa(TE->getMainOp())))) { assert(!TE->isAltShuffle() && "Alternate instructions are only supported by BinaryOperator " "and CastInst."); - assert(!isa(TE->getMainOp()) || - SLPReVec && "Only supported by REVEC."); // Build correct orders for extract{element,value}, loads and // stores. reorderOrder(TE->ReorderIndices, Mask);