@@ -265,18 +265,9 @@ static void convertToParamAS(Use *OldUse, Value *Param, bool HasCvtaParam,
265265 if (HasCvtaParam) {
266266 auto GetParamAddrCastToGeneric =
267267 [](Value *Addr, Instruction *OriginalUser) -> Value * {
268- PointerType *ReturnTy =
269- PointerType::get (OriginalUser->getContext (), ADDRESS_SPACE_GENERIC);
270- Function *CvtToGen = Intrinsic::getOrInsertDeclaration (
271- OriginalUser->getModule (), Intrinsic::nvvm_ptr_param_to_gen,
272- {ReturnTy, PointerType::get (OriginalUser->getContext (),
273- ADDRESS_SPACE_PARAM)});
274-
275- // Cast param address to generic address space
276- Value *CvtToGenCall =
277- CallInst::Create (CvtToGen, Addr, Addr->getName () + " .gen" ,
278- OriginalUser->getIterator ());
279- return CvtToGenCall;
268+ IRBuilder<> IRB (OriginalUser);
269+ Type *GenTy = IRB.getPtrTy (ADDRESS_SPACE_GENERIC);
270+ return IRB.CreateAddrSpaceCast (Addr, GenTy, Addr->getName () + " .gen" );
280271 };
281272 auto *ParamInGenericAS =
282273 GetParamAddrCastToGeneric (I.NewParam , I.OldInstruction );
@@ -515,33 +506,34 @@ void copyByValParam(Function &F, Argument &Arg) {
515506 BasicBlock::iterator FirstInst = F.getEntryBlock ().begin ();
516507 Type *StructType = Arg.getParamByValType ();
517508 const DataLayout &DL = F.getDataLayout ();
518- AllocaInst *AllocA = new AllocaInst (StructType, DL. getAllocaAddrSpace (),
519- Arg.getName (), FirstInst );
509+ IRBuilder<> IRB (&*FirstInst);
510+ AllocaInst *AllocA = IRB. CreateAlloca (StructType, nullptr , Arg.getName ());
520511 // Set the alignment to alignment of the byval parameter. This is because,
521512 // later load/stores assume that alignment, and we are going to replace
522513 // the use of the byval parameter with this alloca instruction.
523- AllocA->setAlignment (F. getParamAlign (Arg. getArgNo ())
524- .value_or (DL.getPrefTypeAlign (StructType)));
514+ AllocA->setAlignment (
515+ Arg. getParamAlign () .value_or (DL.getPrefTypeAlign (StructType)));
525516 Arg.replaceAllUsesWith (AllocA);
526517
527- Value *ArgInParam = new AddrSpaceCastInst (
528- &Arg, PointerType::get (Arg.getContext (), ADDRESS_SPACE_PARAM),
529- Arg.getName (), FirstInst);
518+ Value *ArgInParam =
519+ IRB.CreateIntrinsic (Intrinsic::nvvm_internal_addrspace_wrap,
520+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg.getType ()},
521+ &Arg, {}, Arg.getName ());
522+
530523 // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
531524 // addrspacecast preserves alignment. Since params are constant, this load
532525 // is definitely not volatile.
533526 const auto ArgSize = *AllocA->getAllocationSize (DL);
534- IRBuilder<> IRB (&*FirstInst);
535527 IRB.CreateMemCpy (AllocA, AllocA->getAlign (), ArgInParam, AllocA->getAlign (),
536528 ArgSize);
537529}
538530} // namespace
539531
540532static void handleByValParam (const NVPTXTargetMachine &TM, Argument *Arg) {
541533 Function *Func = Arg->getParent ();
542- bool HasCvtaParam =
543- TM.getSubtargetImpl (*Func)->hasCvtaParam () && isKernelFunction (*Func );
544- bool IsGridConstant = HasCvtaParam && isParamGridConstant (*Arg);
534+ assert ( isKernelFunction (*Func));
535+ const bool HasCvtaParam = TM.getSubtargetImpl (*Func)->hasCvtaParam ();
536+ const bool IsGridConstant = HasCvtaParam && isParamGridConstant (*Arg);
545537 const DataLayout &DL = Func->getDataLayout ();
546538 BasicBlock::iterator FirstInst = Func->getEntryBlock ().begin ();
547539 Type *StructType = Arg->getParamByValType ();
@@ -556,9 +548,11 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
556548 // skip creation of a local copy of the argument.
557549 SmallVector<Use *, 16 > UsesToUpdate (llvm::make_pointer_range (Arg->uses ()));
558550
559- Value *ArgInParamAS = new AddrSpaceCastInst (
560- Arg, PointerType::get (StructType->getContext (), ADDRESS_SPACE_PARAM),
561- Arg->getName (), FirstInst);
551+ IRBuilder<> IRB (&*FirstInst);
552+ Value *ArgInParamAS = IRB.CreateIntrinsic (
553+ Intrinsic::nvvm_internal_addrspace_wrap,
554+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getType ()}, {Arg});
555+
562556 for (Use *U : UsesToUpdate)
563557 convertToParamAS (U, ArgInParamAS, HasCvtaParam, IsGridConstant);
564558 LLVM_DEBUG (dbgs () << " No need to copy or cast " << *Arg << " \n " );
@@ -576,30 +570,31 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
576570 // However, we're still not allowed to write to it. If the user specified
577571 // `__grid_constant__` for the argument, we'll consider escaped pointer as
578572 // read-only.
579- if (HasCvtaParam && ( ArgUseIsReadOnly || IsGridConstant )) {
573+ if (IsGridConstant || ( HasCvtaParam && ArgUseIsReadOnly)) {
580574 LLVM_DEBUG (dbgs () << " Using non-copy pointer to " << *Arg << " \n " );
581575 // Replace all argument pointer uses (which might include a device function
582576 // call) with a cast to the generic address space using cvta.param
583577 // instruction, which avoids a local copy.
584578 IRBuilder<> IRB (&Func->getEntryBlock ().front ());
585579
586- // Cast argument to param address space
587- auto *CastToParam = cast<AddrSpaceCastInst>(IRB.CreateAddrSpaceCast (
588- Arg, IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getName () + " .param" ));
580+ // Cast argument to param address space. Because the backend will emit the
581+ // argument already in the param address space, we need to use the noop
582+ // intrinsic, this had the added benefit of preventing other optimizations
583+ // from folding away this pair of addrspacecasts.
584+ auto *ParamSpaceArg =
585+ IRB.CreateIntrinsic (Intrinsic::nvvm_internal_addrspace_wrap,
586+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getType ()},
587+ Arg, {}, Arg->getName () + " .param" );
589588
590- // Cast param address to generic address space. We do not use an
591- // addrspacecast to generic here, because, LLVM considers `Arg` to be in the
592- // generic address space, and a `generic -> param` cast followed by a `param
593- // -> generic` cast will be folded away. The `param -> generic` intrinsic
594- // will be correctly lowered to `cvta.param`.
595- Value *CvtToGenCall = IRB.CreateIntrinsic (
596- IRB.getPtrTy (ADDRESS_SPACE_GENERIC), Intrinsic::nvvm_ptr_param_to_gen,
597- CastToParam, nullptr , CastToParam->getName () + " .gen" );
589+ // Cast param address to generic address space.
590+ Value *GenericArg = IRB.CreateAddrSpaceCast (
591+ ParamSpaceArg, IRB.getPtrTy (ADDRESS_SPACE_GENERIC),
592+ Arg->getName () + " .gen" );
598593
599- Arg->replaceAllUsesWith (CvtToGenCall );
594+ Arg->replaceAllUsesWith (GenericArg );
600595
601596 // Do not replace Arg in the cast to param space
602- CastToParam ->setOperand (0 , Arg);
597+ ParamSpaceArg ->setOperand (0 , Arg);
603598 } else
604599 copyByValParam (*Func, *Arg);
605600}
@@ -713,12 +708,14 @@ static bool copyFunctionByValArgs(Function &F) {
713708 LLVM_DEBUG (dbgs () << " Creating a copy of byval args of " << F.getName ()
714709 << " \n " );
715710 bool Changed = false ;
716- for (Argument &Arg : F.args ())
717- if (Arg.getType ()->isPointerTy () && Arg.hasByValAttr () &&
718- !(isParamGridConstant (Arg) && isKernelFunction (F))) {
719- copyByValParam (F, Arg);
720- Changed = true ;
721- }
711+ if (isKernelFunction (F)) {
712+ for (Argument &Arg : F.args ())
713+ if (Arg.getType ()->isPointerTy () && Arg.hasByValAttr () &&
714+ !isParamGridConstant (Arg)) {
715+ copyByValParam (F, Arg);
716+ Changed = true ;
717+ }
718+ }
722719 return Changed;
723720}
724721
0 commit comments