Skip to content

Commit eae6914

Browse files
committed
fixup! [AMDGPU] Use wider loop lowering type for LowerMemIntrinsics
Remove special case handling for split unaligned accesses.
1 parent aa9a039 commit eae6914

File tree

4 files changed

+13798
-1038
lines changed

4 files changed

+13798
-1038
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -450,31 +450,14 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
450450
// multiple accesses, effectively unrolling the memcpy loop. Private memory
451451
// also hits this, although accesses may be decomposed.
452452
//
453-
// Don't unroll if
454-
// - Length is not a constant, since unrolling leads to worse performance for
455-
// length values that are smaller or slightly larger than the total size of
456-
// the type returned here. Mitigating that would require a more complex
457-
// lowering for variable-length memcpy and memmove.
458-
// - the memory operations would be split further into byte-wise accesses
459-
// because of their (mis)alignment, since that would lead to a huge code
460-
// size increase.
453+
// Don't unroll if Length is not a constant, since unrolling leads to worse
454+
// performance for length values that are smaller or slightly larger than the
455+
// total size of the type returned here. Mitigating that would require a more
456+
// complex lowering for variable-length memcpy and memmove.
461457
unsigned I32EltsInVector = 4;
462-
if (MemcpyLoopUnroll > 0 && isa<ConstantInt>(Length)) {
463-
unsigned VectorSizeBytes = I32EltsInVector * 4;
464-
unsigned VectorSizeBits = VectorSizeBytes * 8;
465-
unsigned UnrolledVectorBytes = VectorSizeBytes * MemcpyLoopUnroll;
466-
Align PartSrcAlign(commonAlignment(SrcAlign, UnrolledVectorBytes));
467-
Align PartDestAlign(commonAlignment(DestAlign, UnrolledVectorBytes));
468-
469-
const SITargetLowering *TLI = this->getTLI();
470-
bool SrcNotSplit = TLI->allowsMisalignedMemoryAccessesImpl(
471-
VectorSizeBits, SrcAddrSpace, PartSrcAlign);
472-
bool DestNotSplit = TLI->allowsMisalignedMemoryAccessesImpl(
473-
VectorSizeBits, DestAddrSpace, PartDestAlign);
474-
if (SrcNotSplit && DestNotSplit)
475-
return FixedVectorType::get(Type::getInt32Ty(Context),
476-
MemcpyLoopUnroll * I32EltsInVector);
477-
}
458+
if (MemcpyLoopUnroll > 0 && isa<ConstantInt>(Length))
459+
return FixedVectorType::get(Type::getInt32Ty(Context),
460+
MemcpyLoopUnroll * I32EltsInVector);
478461

479462
return FixedVectorType::get(Type::getInt32Ty(Context), I32EltsInVector);
480463
}

0 commit comments

Comments
 (0)