Skip to content

[InferAlignment] Propagate alignment between loads/stores of the same base pointer #145733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,9 @@ class Vectorizer {
/// Postcondition: For all i, ret[i][0].second == 0, because the first instr
/// in the chain is the leader, and an instr touches distance 0 from itself.
std::vector<Chain> gatherChains(ArrayRef<Instruction *> Instrs);

/// Propagates the best alignment in a chain of contiguous accesses
void propagateBestAlignmentsInChain(ArrayRef<ChainElem> C) const;
};

class LoadStoreVectorizerLegacyPass : public FunctionPass {
Expand Down Expand Up @@ -716,6 +719,14 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
unsigned VecRegBytes = TTI.getLoadStoreVecRegBitWidth(AS) / 8;

// We know that the accesses are contiguous. Propagate alignment
// information so that slices of the chain can still be vectorized.
propagateBestAlignmentsInChain(C);
LLVM_DEBUG({
dbgs() << "LSV: Chain after alignment propagation:\n";
dumpChain(C);
});

std::vector<Chain> Ret;
for (unsigned CBegin = 0; CBegin < C.size(); ++CBegin) {
// Find candidate chains of size not greater than the largest vector reg.
Expand Down Expand Up @@ -823,6 +834,7 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
<< Alignment.value() << " to " << NewAlign.value()
<< "\n");
Alignment = NewAlign;
setLoadStoreAlignment(C[CBegin].Inst, Alignment);
}
}

Expand Down Expand Up @@ -880,14 +892,6 @@ bool Vectorizer::vectorizeChain(Chain &C) {
VecElemTy, 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy));

Align Alignment = getLoadStoreAlignment(C[0].Inst);
// If this is a load/store of an alloca, we might have upgraded the alloca's
// alignment earlier. Get the new alignment.
if (AS == DL.getAllocaAddrSpace()) {
Alignment = std::max(
Alignment,
getOrEnforceKnownAlignment(getLoadStorePointerOperand(C[0].Inst),
MaybeAlign(), DL, C[0].Inst, nullptr, &DT));
}

// All elements of the chain must have the same scalar-type size.
#ifndef NDEBUG
Expand Down Expand Up @@ -1634,3 +1638,32 @@ std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
.sextOrTrunc(OrigBitWidth);
return std::nullopt;
}

void Vectorizer::propagateBestAlignmentsInChain(ArrayRef<ChainElem> C) const {
auto PropagateAlignments = [](auto ChainIt) {
ChainElem BestAlignedElem = *ChainIt.begin();
Align BestAlignSoFar = getLoadStoreAlignment(BestAlignedElem.Inst);

for (const ChainElem &E : ChainIt) {
Align OrigAlign = getLoadStoreAlignment(E.Inst);
if (OrigAlign > BestAlignSoFar) {
BestAlignedElem = E;
BestAlignSoFar = OrigAlign;
continue;
}

APInt DeltaFromBestAlignedElem =
APIntOps::abdu(E.OffsetFromLeader, BestAlignedElem.OffsetFromLeader);
// commonAlignment is equivalent to a greatest common power-of-two
// divisor; it returns the largest power of 2 that divides both A and B.
Align NewAlign = commonAlignment(
BestAlignSoFar, DeltaFromBestAlignedElem.getLimitedValue());
if (NewAlign > OrigAlign)
setLoadStoreAlignment(E.Inst, NewAlign);
}
};

// Propagate forwards and backwards.
PropagateAlignments(C);
PropagateAlignments(reverse(C));
}
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ define void @variadics1(ptr %vlist) {
; CHECK-NEXT: [[ARGP_NEXT12:%.*]] = getelementptr i8, ptr [[ARGP_CUR11_ALIGNED]], i64 8
; CHECK-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[ARGP_NEXT12]], i32 7
; CHECK-NEXT: [[ARGP_CUR16_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X2]], i64 0)
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARGP_CUR16_ALIGNED]], align 4294967296
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARGP_CUR16_ALIGNED]], align 8
; CHECK-NEXT: [[X31:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
; CHECK-NEXT: [[X42:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
; CHECK-NEXT: [[X5:%.*]] = fadd double [[X42]], [[X31]]
Expand Down
Loading
Loading