@@ -450,31 +450,14 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
450450 // multiple accesses, effectively unrolling the memcpy loop. Private memory
451451 // also hits this, although accesses may be decomposed.
452452 //
453- // Don't unroll if
454- // - Length is not a constant, since unrolling leads to worse performance for
455- // length values that are smaller or slightly larger than the total size of
456- // the type returned here. Mitigating that would require a more complex
457- // lowering for variable-length memcpy and memmove.
458- // - the memory operations would be split further into byte-wise accesses
459- // because of their (mis)alignment, since that would lead to a huge code
460- // size increase.
453+ // Don't unroll if Length is not a constant, since unrolling leads to worse
454+ // performance for length values that are smaller or slightly larger than the
455+ // total size of the type returned here. Mitigating that would require a more
456+ // complex lowering for variable-length memcpy and memmove.
461457 unsigned I32EltsInVector = 4 ;
462- if (MemcpyLoopUnroll > 0 && isa<ConstantInt>(Length)) {
463- unsigned VectorSizeBytes = I32EltsInVector * 4 ;
464- unsigned VectorSizeBits = VectorSizeBytes * 8 ;
465- unsigned UnrolledVectorBytes = VectorSizeBytes * MemcpyLoopUnroll;
466- Align PartSrcAlign (commonAlignment (SrcAlign, UnrolledVectorBytes));
467- Align PartDestAlign (commonAlignment (DestAlign, UnrolledVectorBytes));
468-
469- const SITargetLowering *TLI = this ->getTLI ();
470- bool SrcNotSplit = TLI->allowsMisalignedMemoryAccessesImpl (
471- VectorSizeBits, SrcAddrSpace, PartSrcAlign);
472- bool DestNotSplit = TLI->allowsMisalignedMemoryAccessesImpl (
473- VectorSizeBits, DestAddrSpace, PartDestAlign);
474- if (SrcNotSplit && DestNotSplit)
475- return FixedVectorType::get (Type::getInt32Ty (Context),
476- MemcpyLoopUnroll * I32EltsInVector);
477- }
458+ if (MemcpyLoopUnroll > 0 && isa<ConstantInt>(Length))
459+ return FixedVectorType::get (Type::getInt32Ty (Context),
460+ MemcpyLoopUnroll * I32EltsInVector);
478461
479462 return FixedVectorType::get (Type::getInt32Ty (Context), I32EltsInVector);
480463}
0 commit comments