Skip to content

Commit fbdf8ab

Browse files
authored
[LSV] Merge contiguous chains across scalar types (llvm#154069)
This change enables the LoadStoreVectorizer to merge and vectorize contiguous chains even when their scalar element types differ, as long as the total bitwidth matches. To do so, we rebase offsets between chains, normalize value types to a common integer type, and insert the necessary casts around loads and stores. This uncovers more vectorization opportunities and explains the expected codegen updates across AMDGPU tests. Key changes: - Chain merging - Build contiguous subchains and then merge adjacent ones when: - They refer to the same underlying pointer object and address space. - They are either all loads or all stores. - A constant leader-to-leader delta exists. - Rebasing one chain into the other's coordinate space does not overlap. - All elements have equal total bit width. - Rebase the second chain by the computed delta and append it to the first. - Type normalization and casting - Normalize merged chains to a common integer type sized to the total bits. - For loads: create a new load of the normalized type, copy metadata, and cast back to the original type for uses if needed. - For stores: bitcast the value to the normalized type and store that. - Insert zext/trunc for integer size changes; use bit-or-pointer casts when sizes match. - Cleanups - Erase replaced instructions and DCE pointer operands when safe. - New helpers: computeLeaderDelta, chainsOverlapAfterRebase, rebaseChain, normalizeChainToType, and allElemsMatchTotalBits. Impact: - Increases vectorization opportunities across mixed-typed but size-compatible access chains. - Large set of expected AMDGPU codegen diffs due to more/changed vectorization. This PR resolves llvm#97715.
1 parent 039f883 commit fbdf8ab

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+3979
-2766
lines changed

llvm/include/llvm/Transforms/Utils/Local.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J);
431431

432432
/// Copy the metadata from the source instruction to the destination (the
433433
/// replacement for the source instruction).
434-
LLVM_ABI void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source);
434+
LLVM_ABI void copyMetadataForAccess(Instruction &Dest, Instruction &Source);
435435

436436
/// Patch the replacement so that it is not more restrictive than the value
437437
/// being replaced. It assumes that the replacement does not get moved from

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
10351035
LoadInst *NewLI = IRB.CreateAlignedLoad(
10361036
LoadableType, NewPtr, commonAlignment(OrigLI.getAlign(), ByteOffset),
10371037
Name + ".off." + Twine(ByteOffset));
1038-
copyMetadataForLoad(*NewLI, OrigLI);
1038+
copyMetadataForAccess(*NewLI, OrigLI);
10391039
NewLI->setAAMetadata(
10401040
AANodes.adjustForAccess(ByteOffset, LoadableType, DL));
10411041
NewLI->setAtomic(OrigLI.getOrdering(), OrigLI.getSyncScopeID());

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ void PointerReplacer::replace(Instruction *I) {
415415
LT->getAlign(), LT->getOrdering(),
416416
LT->getSyncScopeID());
417417
NewI->takeName(LT);
418-
copyMetadataForLoad(*NewI, *LT);
418+
copyMetadataForAccess(*NewI, *LT);
419419

420420
IC.InsertNewInstWith(NewI, LT->getIterator());
421421
IC.replaceInstUsesWith(*LT, NewI);
@@ -606,7 +606,7 @@ LoadInst *InstCombinerImpl::combineLoadToNewType(LoadInst &LI, Type *NewTy,
606606
Builder.CreateAlignedLoad(NewTy, LI.getPointerOperand(), LI.getAlign(),
607607
LI.isVolatile(), LI.getName() + Suffix);
608608
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
609-
copyMetadataForLoad(*NewLoad, LI);
609+
copyMetadataForAccess(*NewLoad, LI);
610610
return NewLoad;
611611
}
612612

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3280,7 +3280,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
32803280
// Copy any metadata that is valid for the new load. This may require
32813281
// conversion to a different kind of metadata, e.g. !nonnull might change
32823282
// to !range or vice versa.
3283-
copyMetadataForLoad(*NewLI, LI);
3283+
copyMetadataForAccess(*NewLI, LI);
32843284

32853285
// Do this after copyMetadataForLoad() to preserve the TBAA shift.
32863286
if (AATags)

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3100,54 +3100,70 @@ void llvm::combineAAMetadata(Instruction *K, const Instruction *J) {
31003100
combineMetadata(K, J, /*DoesKMove=*/true, /*AAOnly=*/true);
31013101
}
31023102

3103-
void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) {
3103+
void llvm::copyMetadataForAccess(Instruction &DestI, Instruction &SourceI) {
31043104
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
3105-
Source.getAllMetadata(MD);
3106-
MDBuilder MDB(Dest.getContext());
3107-
Type *NewType = Dest.getType();
3108-
const DataLayout &DL = Source.getDataLayout();
3105+
SourceI.getAllMetadata(MD);
3106+
MDBuilder MDB(DestI.getContext());
3107+
Type *NewType = DestI.getType();
3108+
3109+
// Only needed for range metadata on loads.
3110+
const DataLayout *DL = nullptr;
3111+
const LoadInst *LSource = dyn_cast<LoadInst>(&SourceI);
3112+
if (LSource)
3113+
DL = &LSource->getDataLayout();
3114+
31093115
for (const auto &MDPair : MD) {
31103116
unsigned ID = MDPair.first;
31113117
MDNode *N = MDPair.second;
3112-
// Note, essentially every kind of metadata should be preserved here! This
3113-
// routine is supposed to clone a load instruction changing *only its type*.
3114-
// The only metadata it makes sense to drop is metadata which is invalidated
3115-
// when the pointer type changes. This should essentially never be the case
3116-
// in LLVM, but we explicitly switch over only known metadata to be
3117-
// conservatively correct. If you are adding metadata to LLVM which pertains
3118-
// to loads, you almost certainly want to add it here.
3118+
31193119
switch (ID) {
3120+
// Applies to both loads and stores as-is.
31203121
case LLVMContext::MD_dbg:
3121-
case LLVMContext::MD_tbaa:
31223122
case LLVMContext::MD_prof:
3123-
case LLVMContext::MD_fpmath:
31243123
case LLVMContext::MD_tbaa_struct:
3125-
case LLVMContext::MD_invariant_load:
31263124
case LLVMContext::MD_alias_scope:
31273125
case LLVMContext::MD_noalias:
31283126
case LLVMContext::MD_nontemporal:
3129-
case LLVMContext::MD_mem_parallel_loop_access:
31303127
case LLVMContext::MD_access_group:
31313128
case LLVMContext::MD_noundef:
31323129
case LLVMContext::MD_noalias_addrspace:
3133-
// All of these directly apply.
3134-
Dest.setMetadata(ID, N);
3130+
case LLVMContext::MD_mem_parallel_loop_access:
3131+
DestI.setMetadata(ID, N);
3132+
break;
3133+
3134+
// Load-only metadata.
3135+
case LLVMContext::MD_fpmath:
3136+
case LLVMContext::MD_invariant_load:
3137+
if (isa<LoadInst>(DestI))
3138+
DestI.setMetadata(ID, N);
31353139
break;
31363140

31373141
case LLVMContext::MD_nonnull:
3138-
copyNonnullMetadata(Source, N, Dest);
3142+
if (auto *LDest = dyn_cast<LoadInst>(&DestI)) {
3143+
if (LSource)
3144+
copyNonnullMetadata(*LSource, N, *LDest);
3145+
}
31393146
break;
31403147

31413148
case LLVMContext::MD_align:
31423149
case LLVMContext::MD_dereferenceable:
31433150
case LLVMContext::MD_dereferenceable_or_null:
3144-
// These only directly apply if the new type is also a pointer.
3151+
// Applies to both loads and stores only if the new type is also a
3152+
// pointer.
31453153
if (NewType->isPointerTy())
3146-
Dest.setMetadata(ID, N);
3154+
DestI.setMetadata(ID, N);
31473155
break;
31483156

31493157
case LLVMContext::MD_range:
3150-
copyRangeMetadata(DL, Source, N, Dest);
3158+
if (auto *LDest = dyn_cast<LoadInst>(&DestI)) {
3159+
if (LSource && DL)
3160+
copyRangeMetadata(*DL, *LSource, N, *LDest);
3161+
}
3162+
break;
3163+
3164+
case LLVMContext::MD_tbaa:
3165+
if (isa<LoadInst>(DestI))
3166+
DestI.setMetadata(ID, N);
31513167
break;
31523168
}
31533169
}

0 commit comments

Comments
 (0)