Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 16 additions & 52 deletions llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,14 +322,12 @@ class Vectorizer {
/// !IsLoad) to ChainBegin -- i.e. there are no intervening may-alias
/// instructions.
///
/// The map ChainElemOffsets must contain all of the elements in
/// [ChainBegin, ChainElem] and their offsets from some arbitrary base
/// address. It's ok if it contains additional entries.
/// ChainSet must contain all of the elements in [ChainBegin, ChainElem].
/// It's ok if it contains additional entries.
template <bool IsLoadChain>
bool isSafeToMove(
Instruction *ChainElem, Instruction *ChainBegin,
const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
BatchAAResults &BatchAA);
bool isSafeToMove(Instruction *ChainElem, Instruction *ChainBegin,
const DenseSet<Instruction *> &ChainSet,
BatchAAResults &BatchAA);

/// Merges the equivalence classes if they have underlying objects that differ
/// by one level of indirection (i.e., one is a getelementptr and the other is
Expand Down Expand Up @@ -579,9 +577,9 @@ std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
// We know that elements in the chain with nonverlapping offsets can't
// alias, but AA may not be smart enough to figure this out. Use a
// hashtable.
DenseMap<Instruction *, APInt /*OffsetFromLeader*/> ChainOffsets;
DenseSet<Instruction *> ChainSet;
for (const auto &E : C)
ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
ChainSet.insert(E.Inst);

// Across a single invocation of this function the IR is not changing, so
// using a batched Alias Analysis is safe and can reduce compile time.
Expand Down Expand Up @@ -612,8 +610,8 @@ std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
SmallVector<ChainElem, 1> NewChain;
NewChain.emplace_back(*ChainBegin);
for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst,
ChainOffsets, BatchAA)) {
if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst, ChainSet,
BatchAA)) {
LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "
<< *ChainIt->Inst << " into " << *ChainBegin->Inst
<< "\n");
Expand Down Expand Up @@ -1264,10 +1262,9 @@ bool Vectorizer::vectorizeChain(Chain &C) {
}

template <bool IsLoadChain>
bool Vectorizer::isSafeToMove(
Instruction *ChainElem, Instruction *ChainBegin,
const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets,
BatchAAResults &BatchAA) {
bool Vectorizer::isSafeToMove(Instruction *ChainElem, Instruction *ChainBegin,
const DenseSet<Instruction *> &ChainSet,
BatchAAResults &BatchAA) {
LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
<< *ChainBegin << ")\n");

Expand All @@ -1293,10 +1290,6 @@ bool Vectorizer::isSafeToMove(
return BasicBlock::iterator(ChainBegin);
}());

const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);
const unsigned ChainElemSize =
DL.getTypeStoreSize(getLoadStoreType(ChainElem));

for (; BBIt != BBItEnd; ++BBIt) {
Instruction *I = &*BBIt;

Expand All @@ -1311,39 +1304,10 @@ bool Vectorizer::isSafeToMove(
if (!IsLoadChain && isInvariantLoad(I))
continue;

// If I is in the chain, we can tell whether it aliases ChainIt by checking
// what offset ChainIt accesses. This may be better than AA is able to do.
//
// We should really only have duplicate offsets for stores (the duplicate
// loads should be CSE'ed), but in case we have a duplicate load, we'll
// split the chain so we don't have to handle this case specially.
if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {
// I and ChainElem overlap if:
// - I and ChainElem have the same offset, OR
// - I's offset is less than ChainElem's, but I touches past the
// beginning of ChainElem, OR
// - ChainElem's offset is less than I's, but ChainElem touches past the
// beginning of I.
const APInt &IOffset = OffsetIt->second;
unsigned IElemSize = DL.getTypeStoreSize(getLoadStoreType(I));
if (IOffset == ChainElemOffset ||
(IOffset.sle(ChainElemOffset) &&
(IOffset + IElemSize).sgt(ChainElemOffset)) ||
(ChainElemOffset.sle(IOffset) &&
(ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
LLVM_DEBUG({
// Double check that AA also sees this alias. If not, we probably
// have a bug.
ModRefInfo MR =
BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem));
assert(IsLoadChain ? isModSet(MR) : isModOrRefSet(MR));
dbgs() << "LSV: Found alias in chain: " << *I << "\n";
});
return false; // We found an aliasing instruction; bail.
}

continue; // We're confident there's no alias.
}
// Allow on-chain aliasing because write-order is preserved when stores are
// vectorized.
if (ChainSet.count(I))
continue;

LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");
ModRefInfo MR = BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; CHECK-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
%call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
Expand Down
7 changes: 1 addition & 6 deletions llvm/test/CodeGen/AMDGPU/function-args-inreg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1696,8 +1696,6 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg
; GFX9-LABEL: void_func_i32_v2float_inreg:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s16
; GFX9-NEXT: global_store_dword v[0:1], v0, off
; GFX9-NEXT: v_mov_b32_e32 v0, s17
; GFX9-NEXT: v_mov_b32_e32 v1, s18
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
Expand All @@ -1707,10 +1705,7 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg
; GFX11-LABEL: void_func_i32_v2float_inreg:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: v_mov_b32_e32 v0, s1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
store i32 %arg0, ptr addrspace(1) poison
Expand Down
Loading