diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index 156b788d8a203..132b12a7b4e6c 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -92,6 +92,7 @@ enum class ResultReason { DiffMathFlags, DiffWrapFlags, DiffBBs, + RepeatedInstrs, NotConsecutive, CantSchedule, Unimplemented, @@ -130,6 +131,8 @@ struct ToStr { return "DiffWrapFlags"; case ResultReason::DiffBBs: return "DiffBBs"; + case ResultReason::RepeatedInstrs: + return "RepeatedInstrs"; case ResultReason::NotConsecutive: return "NotConsecutive"; case ResultReason::CantSchedule: diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 48bc246e4b56a..62be90aee4e0e 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -219,6 +219,10 @@ const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl, if (any_of(drop_begin(Bndl), [BB](auto *V) { return cast(V)->getParent() != BB; })) return createLegalityResult(ResultReason::DiffBBs); + // Pack if instructions repeat, i.e., require some sort of broadcast. + SmallPtrSet Unique(Bndl.begin(), Bndl.end()); + if (Unique.size() != Bndl.size()) + return createLegalityResult(ResultReason::RepeatedInstrs); auto CollectDescrs = getHowToCollectValues(Bndl); if (CollectDescrs.hasVectorInputs()) { diff --git a/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll new file mode 100644 index 0000000000000..6026e92ef9a82 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/repeated_instrs.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s + +define i32 @repeated_splat(ptr %ptr, i32 %v) #0 { +; CHECK-LABEL: define i32 @repeated_splat( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0 +; CHECK-NEXT: [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4 +; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0 +; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x i32> poison, i32 [[SPLAT]], i32 0 +; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x i32> [[PACK]], i32 [[SPLAT]], i32 1 +; CHECK-NEXT: [[VEC:%.*]] = mul <2 x i32> [[VECL]], [[PACK1]] +; CHECK-NEXT: store <2 x i32> [[VEC]], ptr [[GEP0]], align 4 +; CHECK-NEXT: ret i32 0 +; + %gep0 = getelementptr inbounds i32, ptr %ptr, i64 0 + %gep1 = getelementptr inbounds i32, ptr %ptr, i64 1 + %ld0 = load i32, ptr %gep0, align 4 + %ld1 = load i32, ptr %gep1, align 4 + %splat = add i32 %v, 0 + %add0 = mul i32 %ld0, %splat + %add1 = mul i32 %ld1, %splat + store i32 %add0, ptr %gep0, align 4 + store i32 %add1, ptr %gep1, align 4 + ret i32 0 +} + +define i32 @repeated_partial(ptr %ptr, i32 %v) #0 { +; CHECK-LABEL: define i32 @repeated_partial( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[GEP0]], align 4 +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[LD3:%.*]] = load i32, ptr [[GEP3]], align 4 +; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[LD0]], i32 0 +; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[LD1]], i32 1 +; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[LD1]], i32 2 +; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[LD3]], i32 3 +; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4 +; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0 +; CHECK-NEXT: [[VEC:%.*]] = mul <4 x i32> [[VECL]], [[PACK3]] +; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[GEP0]], align 4 +; CHECK-NEXT: ret i32 0 +; + %gep0 = getelementptr inbounds i32, ptr %ptr, i64 0 + %gep1 = getelementptr inbounds i32, ptr %ptr, i64 1 + %gep2 = getelementptr inbounds i32, ptr %ptr, i64 2 + %gep3 = getelementptr inbounds i32, ptr %ptr, i64 3 + %ld0 = load i32, ptr %gep0, align 4 + %ld1 = load i32, ptr %gep1, align 4 + %ld2 = load i32, ptr %gep2, align 4 + %ld3 = load i32, ptr %gep3, align 4 + %splat = add i32 %v, 0 + %add0 = mul i32 %ld0, %ld0 + %add1 = mul i32 %ld1, %ld1 + %add2 = mul i32 %ld2, %ld1 + %add3 = mul i32 %ld3, %ld3 + store i32 %add0, ptr %gep0, align 4 + store i32 %add1, ptr %gep1, align 4 + store i32 %add2, ptr %gep2, align 4 + store i32 %add3, ptr %gep3, align 4 + ret i32 0 +} diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index acc887f9dc6c1..3c24214f0d87f 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -225,6 +225,22 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float Legality.canVectorize({Ld0, Ld1}, /*SkipScheduling=*/true); EXPECT_TRUE(isa(Result)); } + { + // Check Repeated instructions (splat) + const auto &Result = + Legality.canVectorize({Ld0, Ld0}, /*SkipScheduling=*/true); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::RepeatedInstrs); + } + { + // Check Repeated instructions (not splat) + const auto &Result = + Legality.canVectorize({Ld0, Ld1, Ld0}, /*SkipScheduling=*/true); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::RepeatedInstrs); + } } TEST_F(LegalityTest, LegalitySchedule) {