4545//
4646// This pass proceeds in three main phases:
4747//
48- // ## Rewriting loads and stores of p7 and memcpy()-like handling
48+ // ## Rewriting loads and stores of p7
4949//
5050// The first phase is to rewrite away all loads and stors of `ptr addrspace(7)`,
5151// including aggregates containing such pointers, to ones that use `i160`. This
52- // is handled by `StoreFatPtrsAsIntsAndExpandMemcpyVisitor ` , which visits
53- // loads, stores, and allocas and, if the loaded or stored type contains `ptr
54- // addrspace(7)`, rewrites that type to one where the p7s are replaced by i160s,
55- // copying other parts of aggregates as needed. In the case of a store, each
56- // pointer is `ptrtoint`d to i160 before storing, and load integers are
57- // `inttoptr`d back. This same transformation is applied to vectors of pointers.
52+ // is handled by `StoreFatPtrsAsIntsVisitor ` , which visits loads, stores, and
53+ // allocas and, if the loaded or stored type contains `ptr addrspace(7)`,
54+ // rewrites that type to one where the p7s are replaced by i160s, copying other
55+ // parts of aggregates as needed. In the case of a store, each pointer is
56+ // `ptrtoint`d to i160 before storing, and load integers are `inttoptr`d back.
57+ // This same transformation is applied to vectors of pointers.
5858//
5959// Such a transformation allows the later phases of the pass to not need
6060// to handle buffer fat pointers moving to and from memory, where we load
6666// Atomics operations on `ptr addrspace(7)` values are not suppported, as the
6767// hardware does not include a 160-bit atomic.
6868//
69- // In order to save on O(N) work and to ensure that the contents type
70- // legalizer correctly splits up wide loads, also unconditionally lower
71- // memcpy-like intrinsics into loops here.
72- //
7369// ## Buffer contents type legalization
7470//
7571// The underlying buffer intrinsics only support types up to 128 bits long,
235231#include " llvm/IR/InstIterator.h"
236232#include " llvm/IR/InstVisitor.h"
237233#include " llvm/IR/Instructions.h"
238- #include " llvm/IR/IntrinsicInst.h"
239234#include " llvm/IR/Intrinsics.h"
240235#include " llvm/IR/IntrinsicsAMDGPU.h"
241236#include " llvm/IR/Metadata.h"
242237#include " llvm/IR/Operator.h"
243238#include " llvm/IR/PatternMatch.h"
244239#include " llvm/IR/ReplaceConstant.h"
245- #include " llvm/IR/ValueHandle.h"
246240#include " llvm/InitializePasses.h"
247241#include " llvm/Pass.h"
248- #include " llvm/Support/AMDGPUAddrSpace.h"
249242#include " llvm/Support/Alignment.h"
250243#include " llvm/Support/AtomicOrdering.h"
251244#include " llvm/Support/Debug.h"
252245#include " llvm/Support/ErrorHandling.h"
253246#include " llvm/Transforms/Utils/Cloning.h"
254247#include " llvm/Transforms/Utils/Local.h"
255- #include " llvm/Transforms/Utils/LowerMemIntrinsics.h"
256248#include " llvm/Transforms/Utils/ValueMapper.h"
257249
258250#define DEBUG_TYPE " amdgpu-lower-buffer-fat-pointers"
@@ -439,16 +431,14 @@ namespace {
439431// / marshalling costs when reading or storing these values, but since placing
440432// / such pointers into memory is an uncommon operation at best, we feel that
441433// / this cost is acceptable for better performance in the common case.
442- class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
443- : public InstVisitor<StoreFatPtrsAsIntsAndExpandMemcpyVisitor , bool > {
434+ class StoreFatPtrsAsIntsVisitor
435+ : public InstVisitor<StoreFatPtrsAsIntsVisitor , bool > {
444436 BufferFatPtrToIntTypeMap *TypeMap;
445437
446438 ValueToValueMapTy ConvertedForStore;
447439
448440 IRBuilder<> IRB;
449441
450- const TargetMachine *TM;
451-
452442 // Convert all the buffer fat pointers within the input value to inttegers
453443 // so that it can be stored in memory.
454444 Value *fatPtrsToInts (Value *V, Type *From, Type *To, const Twine &Name);
@@ -458,27 +448,20 @@ class StoreFatPtrsAsIntsAndExpandMemcpyVisitor
458448 Value *intsToFatPtrs (Value *V, Type *From, Type *To, const Twine &Name);
459449
460450public:
461- StoreFatPtrsAsIntsAndExpandMemcpyVisitor (BufferFatPtrToIntTypeMap *TypeMap,
462- LLVMContext &Ctx,
463- const TargetMachine *TM)
464- : TypeMap(TypeMap), IRB(Ctx), TM(TM) {}
451+ StoreFatPtrsAsIntsVisitor (BufferFatPtrToIntTypeMap *TypeMap, LLVMContext &Ctx)
452+ : TypeMap(TypeMap), IRB(Ctx) {}
465453 bool processFunction (Function &F);
466454
467455 bool visitInstruction (Instruction &I) { return false ; }
468456 bool visitAllocaInst (AllocaInst &I);
469457 bool visitLoadInst (LoadInst &LI);
470458 bool visitStoreInst (StoreInst &SI);
471459 bool visitGetElementPtrInst (GetElementPtrInst &I);
472-
473- bool visitMemCpyInst (MemCpyInst &MCI);
474- bool visitMemMoveInst (MemMoveInst &MMI);
475- bool visitMemSetInst (MemSetInst &MSI);
476- bool visitMemSetPatternInst (MemSetPatternInst &MSPI);
477460};
478461} // namespace
479462
480- Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::fatPtrsToInts (
481- Value *V, Type *From, Type *To, const Twine &Name) {
463+ Value *StoreFatPtrsAsIntsVisitor ::fatPtrsToInts (Value *V, Type *From, Type *To,
464+ const Twine &Name) {
482465 if (From == To)
483466 return V;
484467 ValueToValueMapTy::iterator Find = ConvertedForStore.find (V);
@@ -515,8 +498,8 @@ Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::fatPtrsToInts(
515498 return Ret;
516499}
517500
518- Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::intsToFatPtrs (
519- Value *V, Type *From, Type *To, const Twine &Name) {
501+ Value *StoreFatPtrsAsIntsVisitor ::intsToFatPtrs (Value *V, Type *From, Type *To,
502+ const Twine &Name) {
520503 if (From == To)
521504 return V;
522505 if (isBufferFatPtrOrVector (To)) {
@@ -548,25 +531,18 @@ Value *StoreFatPtrsAsIntsAndExpandMemcpyVisitor::intsToFatPtrs(
548531 return Ret;
549532}
550533
551- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::processFunction (Function &F) {
534+ bool StoreFatPtrsAsIntsVisitor ::processFunction (Function &F) {
552535 bool Changed = false ;
553- // Process memcpy-like instructions after the main iteration because they can
554- // invalidate iterators.
555- SmallVector<WeakTrackingVH> CanBecomeLoops;
536+ // The visitors will mutate GEPs and allocas, but will push loads and stores
537+ // to the worklist to avoid invalidation.
556538 for (Instruction &I : make_early_inc_range (instructions (F))) {
557- if (isa<MemTransferInst, MemSetInst, MemSetPatternInst>(I))
558- CanBecomeLoops.push_back (&I);
559- else
560- Changed |= visit (I);
561- }
562- for (WeakTrackingVH VH : make_early_inc_range (CanBecomeLoops)) {
563- Changed |= visit (cast<Instruction>(VH));
539+ Changed |= visit (I);
564540 }
565541 ConvertedForStore.clear ();
566542 return Changed;
567543}
568544
569- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::visitAllocaInst (AllocaInst &I) {
545+ bool StoreFatPtrsAsIntsVisitor ::visitAllocaInst (AllocaInst &I) {
570546 Type *Ty = I.getAllocatedType ();
571547 Type *NewTy = TypeMap->remapType (Ty);
572548 if (Ty == NewTy)
@@ -575,8 +551,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitAllocaInst(AllocaInst &I) {
575551 return true ;
576552}
577553
578- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst (
579- GetElementPtrInst &I) {
554+ bool StoreFatPtrsAsIntsVisitor::visitGetElementPtrInst (GetElementPtrInst &I) {
580555 Type *Ty = I.getSourceElementType ();
581556 Type *NewTy = TypeMap->remapType (Ty);
582557 if (Ty == NewTy)
@@ -588,7 +563,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitGetElementPtrInst(
588563 return true ;
589564}
590565
591- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::visitLoadInst (LoadInst &LI) {
566+ bool StoreFatPtrsAsIntsVisitor ::visitLoadInst (LoadInst &LI) {
592567 Type *Ty = LI.getType ();
593568 Type *IntTy = TypeMap->remapType (Ty);
594569 if (Ty == IntTy)
@@ -606,7 +581,7 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitLoadInst(LoadInst &LI) {
606581 return true ;
607582}
608583
609- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor ::visitStoreInst (StoreInst &SI) {
584+ bool StoreFatPtrsAsIntsVisitor ::visitStoreInst (StoreInst &SI) {
610585 Value *V = SI.getValueOperand ();
611586 Type *Ty = V->getType ();
612587 Type *IntTy = TypeMap->remapType (Ty);
@@ -622,47 +597,6 @@ bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitStoreInst(StoreInst &SI) {
622597 return true ;
623598}
624599
625- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemCpyInst (
626- MemCpyInst &MCI) {
627- // TODO: Allow memcpy.p7.p3 as a synonym for the direct-to-LDS copy, which'll
628- // need loop expansion here.
629- if (MCI.getSourceAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER &&
630- MCI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
631- return false ;
632- llvm::expandMemCpyAsLoop (&MCI,
633- TM->getTargetTransformInfo (*MCI.getFunction ()));
634- MCI.eraseFromParent ();
635- return true ;
636- }
637-
638- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemMoveInst (
639- MemMoveInst &MMI) {
640- if (MMI.getSourceAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER &&
641- MMI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
642- return false ;
643- report_fatal_error (
644- " memmove() on buffer descriptors is not implemented because pointer "
645- " comparison on buffer descriptors isn't implemented\n " );
646- }
647-
648- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetInst (
649- MemSetInst &MSI) {
650- if (MSI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
651- return false ;
652- llvm::expandMemSetAsLoop (&MSI);
653- MSI.eraseFromParent ();
654- return true ;
655- }
656-
657- bool StoreFatPtrsAsIntsAndExpandMemcpyVisitor::visitMemSetPatternInst (
658- MemSetPatternInst &MSPI) {
659- if (MSPI.getDestAddressSpace () != AMDGPUAS::BUFFER_FAT_POINTER)
660- return false ;
661- llvm::expandMemSetPatternAsLoop (&MSPI);
662- MSPI.eraseFromParent ();
663- return true ;
664- }
665-
666600namespace {
667601// / Convert loads/stores of types that the buffer intrinsics can't handle into
668602// / one ore more such loads/stores that consist of legal types.
@@ -1193,7 +1127,6 @@ bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) {
11931127
11941128bool LegalizeBufferContentTypesVisitor::processFunction (Function &F) {
11951129 bool Changed = false ;
1196- // Note, memory transfer intrinsics won't
11971130 for (Instruction &I : make_early_inc_range (instructions (F))) {
11981131 Changed |= visit (I);
11991132 }
@@ -2151,12 +2084,6 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
21512084 case Intrinsic::invariant_end:
21522085 case Intrinsic::launder_invariant_group:
21532086 case Intrinsic::strip_invariant_group:
2154- case Intrinsic::memcpy:
2155- case Intrinsic::memcpy_inline:
2156- case Intrinsic::memmove:
2157- case Intrinsic::memset:
2158- case Intrinsic::memset_inline:
2159- case Intrinsic::experimental_memset_pattern:
21602087 return true ;
21612088 }
21622089}
@@ -2426,8 +2353,7 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) {
24262353 /* RemoveDeadConstants=*/ false , /* IncludeSelf=*/ true );
24272354 }
24282355
2429- StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite (&IntTM, M.getContext (),
2430- &TM);
2356+ StoreFatPtrsAsIntsVisitor MemOpsRewrite (&IntTM, M.getContext ());
24312357 LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite (DL,
24322358 M.getContext ());
24332359 for (Function &F : M.functions ()) {
0 commit comments