diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h new file mode 100644 index 0000000000000..3527807867846 --- /dev/null +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h @@ -0,0 +1,50 @@ +//===- StructInitVec.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A pass that vectorizes struct initializations. +// Generic bottom-up vectorization cannot handle these because the +// initialization instructions can be of different types. +// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H +#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/SandboxIR/Pass.h" + +namespace llvm { + +class DataLayout; + +namespace sandboxir { + +class Value; +class Instruction; +class Scheduler; +class Type; + +class StructInitVec final : public RegionPass { + const DataLayout *DL = nullptr; + /// Checks legality of vectorization and \returns the vector type on success, + /// nullopt otherwise. + std::optional canVectorize(ArrayRef Bndl, + Scheduler &Sched); + + void tryEraseDeadInstrs(ArrayRef Stores, + ArrayRef Loads); + +public: + StructInitVec() : RegionPass("struct-init-vec") {} + bool runOnRegion(Region &Rgn, const Analyses &A) final; +}; + +} // namespace sandboxir + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index d32bfbaf7a4c8..3d57a2f574bb8 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -67,8 +67,8 @@ class VecUtils { return *Diff == ElmBytes; } - template - static bool areConsecutive(ArrayRef &Bndl, ScalarEvolution &SE, + template + static bool areConsecutive(ArrayRef Bndl, ScalarEvolution &SE, const DataLayout &DL) { static_assert(std::is_same::value || std::is_same::value, @@ -85,6 +85,11 @@ class VecUtils { } return true; } + template + static bool areConsecutive(ArrayRef Bndl, ScalarEvolution &SE, + const DataLayout &DL) { + return areConsecutive(Bndl, SE, DL); + } /// \Returns the number of vector lanes of \p Ty or 1 if not a vector. /// NOTE: It asserts that \p Ty is a fixed vector type. @@ -119,6 +124,35 @@ class VecUtils { } return FixedVectorType::get(ElemTy, NumElts); } + /// \Returns the combined vector type for \p Bndl, even when the element types + /// differ. For example: i8,i8,i16 will return <4 x i8>. \Returns null if + /// types are of mixed float/integer types. + static Type *getCombinedVectorTypeFor(ArrayRef Bndl, + const DataLayout &DL) { + assert(!Bndl.empty() && "Expected non-empty Bndl!"); + unsigned TotalBits = 0; + unsigned MinElmBits = std::numeric_limits::max(); + Type *MinElmTy = nullptr; + bool LastIsFloat = false; + for (auto [Idx, V] : enumerate(Bndl)) { + Type *ElmTy = getElementType(Utils::getExpectedType(V)); + + // Reject mixed integer/float types. + bool IsFloat = ElmTy->isFloatingPointTy(); + if (Idx != 0 && IsFloat != LastIsFloat) + return nullptr; + LastIsFloat = IsFloat; + + unsigned ElmBits = Utils::getNumBits(ElmTy, DL); + TotalBits += ElmBits * VecUtils::getNumLanes(V); + if (ElmBits < MinElmBits) { + MinElmBits = ElmBits; + MinElmTy = ElmTy; + } + } + unsigned NumElms = TotalBits / MinElmBits; + return FixedVectorType::get(MinElmTy, NumElms); + } /// \Returns the instruction in \p Instrs that is lowest in the BB. Expects /// that all instructions are in the same BB. static Instruction *getLowest(ArrayRef Instrs) { diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index 96670fe3ea195..c16056aebdf3f 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_component_library(LLVMVectorize SandboxVectorizer/Passes/RegionsFromBBs.cpp SandboxVectorizer/Passes/RegionsFromMetadata.cpp SandboxVectorizer/Passes/SeedCollection.cpp + SandboxVectorizer/Passes/StructInitVec.cpp SandboxVectorizer/Passes/TransactionAcceptOrRevert.cpp SandboxVectorizer/Passes/TransactionSave.cpp SandboxVectorizer/SandboxVectorizer.cpp diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def index 02b973926854d..657778cfd2b62 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def @@ -26,6 +26,7 @@ REGION_PASS("tr-accept", ::llvm::sandboxir::TransactionAlwaysAccept) REGION_PASS("tr-revert", ::llvm::sandboxir::TransactionAlwaysRevert) REGION_PASS("tr-accept-or-revert", ::llvm::sandboxir::TransactionAcceptOrRevert) REGION_PASS("bottom-up-vec", ::llvm::sandboxir::BottomUpVec) +REGION_PASS("struct-init-vec", ::llvm::sandboxir::StructInitVec) #undef REGION_PASS diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp new file mode 100644 index 0000000000000..a895619995294 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp @@ -0,0 +1,171 @@ +//===- StructInitVec.cpp - Vectorizer pass for struct initializations -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h" +#include "llvm/SandboxIR/Module.h" +#include "llvm/SandboxIR/Region.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" + +namespace llvm { + +namespace sandboxir { + +std::optional StructInitVec::canVectorize(ArrayRef Bndl, + Scheduler &Sched) { + // Check if in the same BB. + auto *BB = cast(Bndl[0])->getParent(); + if (any_of(drop_begin(Bndl), + [BB](auto *V) { return cast(V)->getParent() != BB; })) + return std::nullopt; + + // Check if instructions repeat. + SmallPtrSet Unique(Bndl.begin(), Bndl.end()); + if (Unique.size() != Bndl.size()) + return std::nullopt; + + // Don't mix integer with floating point. + bool IsFloat = false; + bool IsInteger = false; + for ([[maybe_unused]] auto *I : Bndl) { + if (Utils::getExpectedType(Bndl[0])->isFloatingPointTy()) + IsFloat = true; + else + IsInteger = true; + } + if (IsFloat && IsInteger) + return std::nullopt; + + Type *VecTy = VecUtils::getCombinedVectorTypeFor(Bndl, *DL); + if (VecTy == nullptr) + return std::nullopt; + + // Check scheduling. + if (!Sched.trySchedule(Bndl)) + return std::nullopt; + + return VecTy; +} + +void StructInitVec::tryEraseDeadInstrs(ArrayRef Stores, + ArrayRef Loads) { + SmallPtrSet DeadCandidates; + for (auto *SI : Stores) { + if (auto *PtrI = + dyn_cast(cast(SI)->getPointerOperand())) + DeadCandidates.insert(PtrI); + SI->eraseFromParent(); + } + for (auto *LI : Loads) { + if (auto *PtrI = + dyn_cast(cast(LI)->getPointerOperand())) + DeadCandidates.insert(PtrI); + cast(LI)->eraseFromParent(); + } + for (auto *PtrI : DeadCandidates) + if (!PtrI->hasNUsesOrMore(1)) + PtrI->eraseFromParent(); +} + +bool StructInitVec::runOnRegion(Region &Rgn, const Analyses &A) { + SmallVector Bndl(Rgn.getAux().begin(), Rgn.getAux().end()); + assert(Bndl.size() >= 2 && "Bad slice!"); + Function &F = *Bndl[0]->getParent()->getParent(); + DL = &F.getParent()->getDataLayout(); + auto &Ctx = F.getContext(); + Scheduler Sched(A.getAA(), Ctx); + if (!VecUtils::areConsecutive( + Bndl, A.getScalarEvolution(), *DL)) + return false; + if (!canVectorize(Bndl, Sched)) + return false; + + SmallVector Operands; + Operands.reserve(Bndl.size()); + for (auto *I : Bndl) { + auto *Op = cast(I)->getValueOperand(); + Operands.push_back(Op); + } + BasicBlock *BB = Bndl[0]->getParent(); + bool AllLoads = all_of(Operands, [BB](Value *V) { + auto *LI = dyn_cast(V); + if (LI == nullptr) + return false; + // TODO: For now we don't cross BBs. + if (LI->getParent() != BB) + return false; + if (LI->hasNUsesOrMore(2)) + return false; + return true; + }); + bool AllConstants = + all_of(Operands, [](Value *V) { return isa(V); }); + if (!AllLoads && !AllConstants) + return false; + + Value *VecOp = nullptr; + SmallVector Loads; + if (AllLoads) { + // TODO: Try to avoid the extra copy to an instruction vector. + Loads.reserve(Operands.size()); + for (Value *Op : Operands) + Loads.push_back(cast(Op)); + + bool Consecutive = VecUtils::areConsecutive( + Loads, A.getScalarEvolution(), *DL); + if (!Consecutive) + return false; + if (!canVectorize(Loads, Sched)) + return false; + + // Generate vector load. + Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL); + Value *LdPtr = cast(Loads[0])->getPointerOperand(); + // TODO: Compute alignment. + Align LdAlign(1); + auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator()); + VecOp = LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL"); + } else if (AllConstants) { + SmallVector Constants; + Constants.reserve(Operands.size()); + for (Value *Op : Operands) { + auto *COp = cast(Op); + if (auto *AggrCOp = dyn_cast(COp)) { + // If the operand is a constant aggregate, then append all its elements. + for (Value *Elm : AggrCOp->operands()) + Constants.push_back(cast(Elm)); + } else if (auto *SeqCOp = dyn_cast(COp)) { + for (auto ElmIdx : seq(SeqCOp->getNumElements())) + Constants.push_back(SeqCOp->getElementAsConstant(ElmIdx)); + } else if (auto *Zero = dyn_cast(COp)) { + auto *ZeroElm = Zero->getSequentialElement(); + for (auto ElmIdx : + seq(Zero->getElementCount().getFixedValue())) + Constants.push_back(ZeroElm); + } else { + Constants.push_back(COp); + } + } + VecOp = ConstantVector::get(Constants); + } + + // Generate vector store. + Value *StPtr = cast(Bndl[0])->getPointerOperand(); + // TODO: Compute alignment. + Align StAlign(1); + auto StWhereIt = std::next(VecUtils::getLowest(Bndl)->getIterator()); + StoreInst::create(VecOp, StPtr, StAlign, StWhereIt, Ctx); + + tryEraseDeadInstrs(Bndl, Loads); + return true; +} + +} // namespace sandboxir + +} // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp index 8432b066f966c..e168ddab9baba 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp @@ -8,6 +8,7 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAcceptOrRevert.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysAccept.h" #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysRevert.h" diff --git a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll new file mode 100644 index 0000000000000..6f8dd54404e98 --- /dev/null +++ b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll @@ -0,0 +1,343 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<(enable-diff-types)struct-init-vec>" %s -S | FileCheck %s + +define void @struct_init_basic(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_basic( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <4 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META0:![0-9]+]] +; CHECK-NEXT: store <4 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META0]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ptr2 = getelementptr i8, ptr %ptr, i32 2 + %ld0 = load i8, ptr %ptr0 + %ld1 = load i8, ptr %ptr1 + %ld2 = load i16, ptr %ptr2 + store i8 %ld0, ptr %ptr0 + store i8 %ld1, ptr %ptr1 + store i16 %ld2, ptr %ptr2 + ret void +} + +define void @struct_init_non_pow2(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_non_pow2( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <3 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META1:![0-9]+]] +; CHECK-NEXT: store <3 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META1]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ld0 = load i8, ptr %ptr0 + %ld1 = load i16, ptr %ptr1 + store i8 %ld0, ptr %ptr0 + store i16 %ld1, ptr %ptr1 + ret void +} + +define void @struct_init_vectorize_vectors(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_vectorize_vectors( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <4 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META2:![0-9]+]] +; CHECK-NEXT: store <4 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META2]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 2 + %ld0 = load <2 x i8>, ptr %ptr0 + %ld1 = load <2 x i8>, ptr %ptr1 + store <2 x i8> %ld0, ptr %ptr0 + store <2 x i8> %ld1, ptr %ptr1 + ret void +} + +define void @struct_init_vectorize_vectors_diff_types(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_vectorize_vectors_diff_types( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <8 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META3:![0-9]+]] +; CHECK-NEXT: store <8 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META3]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 2 + %ptr2 = getelementptr i8, ptr %ptr, i32 4 + %ld0 = load i16, ptr %ptr0 + %ld1 = load <2 x i8>, ptr %ptr1 + %ld2 = load <2 x i16>, ptr %ptr2 + store i16 %ld0, ptr %ptr0 + store <2 x i8> %ld1, ptr %ptr1 + store <2 x i16> %ld2, ptr %ptr2 + ret void +} + +; Don't vectorize if there is a gap. +define void @struct_init_gap(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_gap( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 2 +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR0]], align 1 +; CHECK-NEXT: [[LD1:%.*]] = load i16, ptr [[PTR1]], align 2 +; CHECK-NEXT: store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META4:![0-9]+]] +; CHECK-NEXT: store i16 [[LD1]], ptr [[PTR1]], align 2, !sandboxvec [[META4]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 2 + %ld0 = load i8, ptr %ptr0 + %ld1 = load i16, ptr %ptr1 + store i8 %ld0, ptr %ptr0 + store i16 %ld1, ptr %ptr1 + ret void +} + +define void @struct_init_loads_not_consecutive(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_loads_not_consecutive( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 2 +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR1]], align 1 +; CHECK-NEXT: [[LD1:%.*]] = load i16, ptr [[PTR0]], align 2 +; CHECK-NEXT: store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META5:![0-9]+]] +; CHECK-NEXT: store i16 [[LD1]], ptr [[PTR1]], align 2, !sandboxvec [[META5]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 2 + %ld0 = load i8, ptr %ptr1 + %ld1 = load i16, ptr %ptr0 + store i8 %ld0, ptr %ptr0 + store i16 %ld1, ptr %ptr1 + ret void +} + +; Vectorize same types, even if bottom-up-vec could do so too. +define void @struct_init_same_types(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_same_types( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META6:![0-9]+]] +; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META6]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ld0 = load i8, ptr %ptr0 + %ld1 = load i8, ptr %ptr1 + store i8 %ld0, ptr %ptr0 + store i8 %ld1, ptr %ptr1 + ret void +} + +; Don't vectorize mixed integers/floats. +define void @struct_init_mixed_int_float(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_mixed_int_float( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1 +; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[PTR1]], align 4 +; CHECK-NEXT: store i32 [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META7:![0-9]+]] +; CHECK-NEXT: store float [[LD1]], ptr [[PTR1]], align 4, !sandboxvec [[META7]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i32, ptr %ptr, i32 0 + %ptr1 = getelementptr i32, ptr %ptr, i32 1 + %ld0 = load i32, ptr %ptr0 + %ld1 = load float, ptr %ptr1 + store i32 %ld0, ptr %ptr0 + store float %ld1, ptr %ptr1 + ret void +} + +define void @struct_init_mixed_int_float_vectors(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_mixed_int_float_vectors( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1 +; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[PTR0]], align 4 +; CHECK-NEXT: [[LD1:%.*]] = load <2 x float>, ptr [[PTR1]], align 8 +; CHECK-NEXT: store i32 [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META8:![0-9]+]] +; CHECK-NEXT: store <2 x float> [[LD1]], ptr [[PTR1]], align 8, !sandboxvec [[META8]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i32, ptr %ptr, i32 0 + %ptr1 = getelementptr i32, ptr %ptr, i32 1 + %ld0 = load i32, ptr %ptr0 + %ld1 = load <2 x float>, ptr %ptr1 + store i32 %ld0, ptr %ptr0 + store <2 x float> %ld1, ptr %ptr1 + ret void +} + +; Don't cross BBs (for now). +define ptr @struct_init_dont_cross_bbs(ptr %ptr) { +; CHECK-LABEL: define ptr @struct_init_dont_cross_bbs( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1 +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR0]], align 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[PTR1]], align 1 +; CHECK-NEXT: br label %[[BB:.*]] +; CHECK: [[BB]]: +; CHECK-NEXT: store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META9:![0-9]+]] +; CHECK-NEXT: store i8 [[LD1]], ptr [[PTR1]], align 1, !sandboxvec [[META9]] +; CHECK-NEXT: ret ptr [[PTR1]] +; +entry: + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ld0 = load i8, ptr %ptr0 + %ld1 = load i8, ptr %ptr1 + br label %bb + +bb: + store i8 %ld0, ptr %ptr0 + store i8 %ld1, ptr %ptr1 + ret ptr %ptr1 +} + +; Check that all dead GEPs are removed. +define void @struct_init_cleanup_geps(ptr %ptrA, ptr %ptrB) { +; CHECK-LABEL: define void @struct_init_cleanup_geps( +; CHECK-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]]) { +; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr i8, ptr [[PTRA]], i32 0 +; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i8, ptr [[PTRB]], i32 0 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTRA0]], align 1, !sandboxvec [[META10:![0-9]+]] +; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTRB0]], align 1, !sandboxvec [[META10]] +; CHECK-NEXT: ret void +; + %ptrA0 = getelementptr i8, ptr %ptrA, i32 0 + %ptrA1 = getelementptr i8, ptr %ptrA, i32 1 + %ptrB0 = getelementptr i8, ptr %ptrB, i32 0 + %ptrB1 = getelementptr i8, ptr %ptrB, i32 1 + %ld0 = load i8, ptr %ptrA0 + %ld1 = load i8, ptr %ptrA1 + store i8 %ld0, ptr %ptrB0 + store i8 %ld1, ptr %ptrB1 + ret void +} + +; Check that we don't try to erase GEPs with other users. +define ptr @struct_init_cleanup_gep_with_external_use(ptr %ptr) { +; CHECK-LABEL: define ptr @struct_init_cleanup_gep_with_external_use( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META11:![0-9]+]] +; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META11]] +; CHECK-NEXT: ret ptr [[PTR1]] +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ld0 = load i8, ptr %ptr0 + %ld1 = load i8, ptr %ptr1 + store i8 %ld0, ptr %ptr0 + store i8 %ld1, ptr %ptr1 + ret ptr %ptr1 +} + +; Check that we schedule both loads and stores +define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalias %ptrB) { +; CHECK-LABEL: define void @struct_init_schedule_stores_and_loads( +; CHECK-SAME: ptr noalias [[PTRA:%.*]], ptr noalias [[PTRB:%.*]]) { +; CHECK-NEXT: [[PTRA0:%.*]] = getelementptr i8, ptr [[PTRA]], i64 0 +; CHECK-NEXT: [[PTRB0:%.*]] = getelementptr i8, ptr [[PTRB]], i64 0 +; CHECK-NEXT: [[PTRB1:%.*]] = getelementptr i8, ptr [[PTRB]], i64 1 +; CHECK-NEXT: [[OTHER:%.*]] = load i8, ptr [[PTRB1]], align 1 +; CHECK-NEXT: [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTRA0]], align 1, !sandboxvec [[META12:![0-9]+]] +; CHECK-NEXT: store i8 0, ptr [[PTRA0]], align 1 +; CHECK-NEXT: store <2 x i8> [[VECIINITL]], ptr [[PTRB0]], align 1, !sandboxvec [[META12]] +; CHECK-NEXT: ret void +; + %ptrA0 = getelementptr i8, ptr %ptrA, i64 0 + %ptrA1 = getelementptr i8, ptr %ptrA, i64 1 + %ptrB0 = getelementptr i8, ptr %ptrB, i64 0 + %ptrB1 = getelementptr i8, ptr %ptrB, i64 1 + + %ld0 = load i8, ptr %ptrA0 + store i8 %ld0, ptr %ptrB0 + + store i8 0, ptr %ptrA0 + %other = load i8, ptr %ptrB1 + + %ld1 = load i8, ptr %ptrA1 + store i8 %ld1, ptr %ptrB1 + ret void +} + +; Store-constant pattern. +define void @struct_init_constants(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_constants( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: store <3 x i8> , ptr [[PTR0]], align 1, !sandboxvec [[META13:![0-9]+]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ptr2 = getelementptr i8, ptr %ptr, i32 3 + store i8 42, ptr %ptr0 + store i16 43, ptr %ptr1 + store i8 44, ptr %ptr2 + ret void +} + +; Same but with ConstantDataSequential. +define void @struct_init_constants_CDS(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_constants_CDS( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0 +; CHECK-NEXT: store <4 x i8> , ptr [[PTR0]], align 1, !sandboxvec [[META14:![0-9]+]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr i8, ptr %ptr, i32 0 + %ptr1 = getelementptr i8, ptr %ptr, i32 1 + %ptr2 = getelementptr i8, ptr %ptr, i32 3 + store i8 0, ptr %ptr0 + store <2 x i8> , ptr %ptr1 + store i8 3, ptr %ptr2 + ret void +} + +; Same but with floats +define void @struct_init_constants_CDS_float(ptr %ptr) { +; CHECK-LABEL: define void @struct_init_constants_CDS_float( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0 +; CHECK-NEXT: store <8 x float> , ptr [[PTR0]], align 1, !sandboxvec [[META15:![0-9]+]] +; CHECK-NEXT: ret void +; + %ptr0 = getelementptr float, ptr %ptr, i32 0 + %ptr1 = getelementptr float, ptr %ptr, i32 1 + %ptr2 = getelementptr float, ptr %ptr, i32 3 + store float 1.0, ptr %ptr0 + store <2 x float> zeroinitializer, ptr %ptr1 + store <5 x float> zeroinitializer, ptr %ptr2 + ret void +} + +;. +; CHECK: [[META0]] = distinct !{!"sandboxregion"} +; CHECK: [[META1]] = distinct !{!"sandboxregion"} +; CHECK: [[META2]] = distinct !{!"sandboxregion"} +; CHECK: [[META3]] = distinct !{!"sandboxregion"} +; CHECK: [[META4]] = distinct !{!"sandboxregion"} +; CHECK: [[META5]] = distinct !{!"sandboxregion"} +; CHECK: [[META6]] = distinct !{!"sandboxregion"} +; CHECK: [[META7]] = distinct !{!"sandboxregion"} +; CHECK: [[META8]] = distinct !{!"sandboxregion"} +; CHECK: [[META9]] = distinct !{!"sandboxregion"} +; CHECK: [[META10]] = distinct !{!"sandboxregion"} +; CHECK: [[META11]] = distinct !{!"sandboxregion"} +; CHECK: [[META12]] = distinct !{!"sandboxregion"} +; CHECK: [[META13]] = distinct !{!"sandboxregion"} +; CHECK: [[META14]] = distinct !{!"sandboxregion"} +; CHECK: [[META15]] = distinct !{!"sandboxregion"} +;. diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp index 2bfea6908305c..03eb32fdf6002 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/SandboxIR/Context.h" #include "llvm/SandboxIR/Function.h" +#include "llvm/SandboxIR/Module.h" #include "llvm/SandboxIR/Type.h" #include "llvm/Support/SourceMgr.h" #include "gmock/gmock.h" @@ -424,6 +425,80 @@ TEST_F(VecUtilsTest, GetWideType) { EXPECT_EQ(sandboxir::VecUtils::getWideType(Int32X4Ty, 2), Int32X8Ty); } +TEST_F(VecUtilsTest, GetCombinedVectorTypeFor) { + parseIR(R"IR( +define void @foo(ptr %ptr, i8 %i8, i16 %i16, i32 %i32, float %f32, double %f64, <2 x i8> %v2xi8, <2 x i16> %v2xi16) { + store i8 %i8, ptr %ptr + store i16 %i16, ptr %ptr + store i32 %i32, ptr %ptr + store float %f32, ptr %ptr + store double %f64, ptr %ptr + store <2 x i8> %v2xi8, ptr %ptr + store <2 x i16> %v2xi16, ptr %ptr + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + + sandboxir::Context Ctx(C); + auto &F = *Ctx.createFunction(&LLVMF); + auto &BB = *F.begin(); + const auto &DL = F.getParent()->getDataLayout(); + auto It = BB.begin(); + auto *Store_i8 = &*It++; + auto *Store_i16 = &*It++; + auto *Store_i32 = &*It++; + auto *Store_f32 = &*It++; + auto *Store_f64 = &*It++; + auto *Store_2xi8 = &*It++; + auto *Store_2xi16 = &*It++; + + auto *I8Ty = sandboxir::IntegerType::get(Ctx, 8); + auto *I16Ty = sandboxir::IntegerType::get(Ctx, 16); + auto *F32Ty = sandboxir::Type::getFloatTy(Ctx); + + // Check same type. + EXPECT_EQ( + sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i8, Store_i8}, DL), + sandboxir::FixedVectorType::get(I8Ty, 2)); + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_2xi8, Store_2xi8}, DL), + sandboxir::FixedVectorType::get(I8Ty, 4)); + + // Check different types, power-of-two. + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_i8, Store_i8, Store_i16}, DL), + sandboxir::FixedVectorType::get(I8Ty, 4)); + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_i8, Store_i8, Store_i16, Store_i32}, DL), + sandboxir::FixedVectorType::get(I8Ty, 8)); + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_2xi8, Store_2xi8, Store_2xi16}, DL), + sandboxir::FixedVectorType::get(I8Ty, 8)); + + // Check different types non-power-of-two. + EXPECT_EQ( + sandboxir::VecUtils::getCombinedVectorTypeFor({Store_f32, Store_f64}, DL), + sandboxir::FixedVectorType::get(F32Ty, 3)); + EXPECT_EQ( + sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i32, Store_i16}, DL), + sandboxir::FixedVectorType::get(I16Ty, 3)); + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_i8, Store_i16, Store_i32}, DL), + sandboxir::FixedVectorType::get(I8Ty, 7)); + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_i8, Store_i16, Store_2xi8}, DL), + sandboxir::FixedVectorType::get(I8Ty, 5)); + + // Mix float and integer. + EXPECT_EQ( + sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i32, Store_f32}, DL), + nullptr); + EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor( + {Store_f32, Store_2xi8}, DL), + nullptr); +} + TEST_F(VecUtilsTest, GetLowest) { parseIR(R"IR( define void @foo(i8 %v) {