@@ -265,18 +265,9 @@ static void convertToParamAS(Use *OldUse, Value *Param, bool HasCvtaParam,
265265 if (HasCvtaParam) {
266266 auto GetParamAddrCastToGeneric =
267267 [](Value *Addr, Instruction *OriginalUser) -> Value * {
268- PointerType *ReturnTy =
269- PointerType::get (OriginalUser->getContext (), ADDRESS_SPACE_GENERIC);
270- Function *CvtToGen = Intrinsic::getOrInsertDeclaration (
271- OriginalUser->getModule (), Intrinsic::nvvm_ptr_param_to_gen,
272- {ReturnTy, PointerType::get (OriginalUser->getContext (),
273- ADDRESS_SPACE_PARAM)});
274-
275- // Cast param address to generic address space
276- Value *CvtToGenCall =
277- CallInst::Create (CvtToGen, Addr, Addr->getName () + " .gen" ,
278- OriginalUser->getIterator ());
279- return CvtToGenCall;
268+ IRBuilder<> IRB (OriginalUser);
269+ Type *GenTy = IRB.getPtrTy (ADDRESS_SPACE_GENERIC);
270+ return IRB.CreateAddrSpaceCast (Addr, GenTy, Addr->getName () + " .gen" );
280271 };
281272 auto *ParamInGenericAS =
282273 GetParamAddrCastToGeneric (I.NewParam , I.OldInstruction );
@@ -515,33 +506,34 @@ void copyByValParam(Function &F, Argument &Arg) {
515506 BasicBlock::iterator FirstInst = F.getEntryBlock ().begin ();
516507 Type *StructType = Arg.getParamByValType ();
517508 const DataLayout &DL = F.getDataLayout ();
518- AllocaInst *AllocA = new AllocaInst (StructType, DL. getAllocaAddrSpace (),
519- Arg.getName (), FirstInst );
509+ IRBuilder<> IRB (&*FirstInst);
510+ AllocaInst *AllocA = IRB. CreateAlloca (StructType, nullptr , Arg.getName ());
520511 // Set the alignment to alignment of the byval parameter. This is because,
521512 // later load/stores assume that alignment, and we are going to replace
522513 // the use of the byval parameter with this alloca instruction.
523- AllocA->setAlignment (F. getParamAlign (Arg. getArgNo ())
524- .value_or (DL.getPrefTypeAlign (StructType)));
514+ AllocA->setAlignment (
515+ Arg. getParamAlign () .value_or (DL.getPrefTypeAlign (StructType)));
525516 Arg.replaceAllUsesWith (AllocA);
526517
527- Value *ArgInParam = new AddrSpaceCastInst (
528- &Arg, PointerType::get (Arg.getContext (), ADDRESS_SPACE_PARAM),
529- Arg.getName (), FirstInst);
518+ Value *ArgInParam =
519+ IRB.CreateIntrinsic (Intrinsic::nvvm_internal_noop_addrspacecast,
520+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg.getType ()},
521+ &Arg, {}, Arg.getName ());
522+
530523 // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
531524 // addrspacecast preserves alignment. Since params are constant, this load
532525 // is definitely not volatile.
533526 const auto ArgSize = *AllocA->getAllocationSize (DL);
534- IRBuilder<> IRB (&*FirstInst);
535527 IRB.CreateMemCpy (AllocA, AllocA->getAlign (), ArgInParam, AllocA->getAlign (),
536528 ArgSize);
537529}
538530} // namespace
539531
540532static void handleByValParam (const NVPTXTargetMachine &TM, Argument *Arg) {
541533 Function *Func = Arg->getParent ();
542- bool HasCvtaParam =
543- TM.getSubtargetImpl (*Func)->hasCvtaParam () && isKernelFunction (*Func );
544- bool IsGridConstant = HasCvtaParam && isParamGridConstant (*Arg);
534+ assert ( isKernelFunction (*Func));
535+ const bool HasCvtaParam = TM.getSubtargetImpl (*Func)->hasCvtaParam ();
536+ const bool IsGridConstant = HasCvtaParam && isParamGridConstant (*Arg);
545537 const DataLayout &DL = Func->getDataLayout ();
546538 BasicBlock::iterator FirstInst = Func->getEntryBlock ().begin ();
547539 Type *StructType = Arg->getParamByValType ();
@@ -558,9 +550,11 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
558550 for (Use &U : Arg->uses ())
559551 UsesToUpdate.push_back (&U);
560552
561- Value *ArgInParamAS = new AddrSpaceCastInst (
562- Arg, PointerType::get (StructType->getContext (), ADDRESS_SPACE_PARAM),
563- Arg->getName (), FirstInst);
553+ IRBuilder<> IRB (&*FirstInst);
554+ Value *ArgInParamAS = IRB.CreateIntrinsic (
555+ Intrinsic::nvvm_internal_noop_addrspacecast,
556+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getType ()}, {Arg});
557+
564558 for (Use *U : UsesToUpdate)
565559 convertToParamAS (U, ArgInParamAS, HasCvtaParam, IsGridConstant);
566560 LLVM_DEBUG (dbgs () << " No need to copy or cast " << *Arg << " \n " );
@@ -578,30 +572,31 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {
578572 // However, we're still not allowed to write to it. If the user specified
579573 // `__grid_constant__` for the argument, we'll consider escaped pointer as
580574 // read-only.
581- if (HasCvtaParam && ( ArgUseIsReadOnly || IsGridConstant )) {
575+ if (IsGridConstant || ( HasCvtaParam && ArgUseIsReadOnly)) {
582576 LLVM_DEBUG (dbgs () << " Using non-copy pointer to " << *Arg << " \n " );
583577 // Replace all argument pointer uses (which might include a device function
584578 // call) with a cast to the generic address space using cvta.param
585579 // instruction, which avoids a local copy.
586580 IRBuilder<> IRB (&Func->getEntryBlock ().front ());
587581
588- // Cast argument to param address space
589- auto *CastToParam = cast<AddrSpaceCastInst>(IRB.CreateAddrSpaceCast (
590- Arg, IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getName () + " .param" ));
582+ // Cast argument to param address space. Because the backend will emit the
583+ // argument already in the param address space, we need to use the noop
584+ // intrinsic, this had the added benefit of preventing other optimizations
585+ // from folding away this pair of addrspacecasts.
586+ auto *ParamSpaceArg =
587+ IRB.CreateIntrinsic (Intrinsic::nvvm_internal_noop_addrspacecast,
588+ {IRB.getPtrTy (ADDRESS_SPACE_PARAM), Arg->getType ()},
589+ Arg, {}, Arg->getName () + " .param" );
591590
592- // Cast param address to generic address space. We do not use an
593- // addrspacecast to generic here, because, LLVM considers `Arg` to be in the
594- // generic address space, and a `generic -> param` cast followed by a `param
595- // -> generic` cast will be folded away. The `param -> generic` intrinsic
596- // will be correctly lowered to `cvta.param`.
597- Value *CvtToGenCall = IRB.CreateIntrinsic (
598- IRB.getPtrTy (ADDRESS_SPACE_GENERIC), Intrinsic::nvvm_ptr_param_to_gen,
599- CastToParam, nullptr , CastToParam->getName () + " .gen" );
591+ // Cast param address to generic address space.
592+ Value *GenericArg = IRB.CreateAddrSpaceCast (
593+ ParamSpaceArg, IRB.getPtrTy (ADDRESS_SPACE_GENERIC),
594+ Arg->getName () + " .gen" );
600595
601- Arg->replaceAllUsesWith (CvtToGenCall );
596+ Arg->replaceAllUsesWith (GenericArg );
602597
603598 // Do not replace Arg in the cast to param space
604- CastToParam ->setOperand (0 , Arg);
599+ ParamSpaceArg ->setOperand (0 , Arg);
605600 } else
606601 copyByValParam (*Func, *Arg);
607602}
@@ -715,12 +710,14 @@ static bool copyFunctionByValArgs(Function &F) {
715710 LLVM_DEBUG (dbgs () << " Creating a copy of byval args of " << F.getName ()
716711 << " \n " );
717712 bool Changed = false ;
718- for (Argument &Arg : F.args ())
719- if (Arg.getType ()->isPointerTy () && Arg.hasByValAttr () &&
720- !(isParamGridConstant (Arg) && isKernelFunction (F))) {
721- copyByValParam (F, Arg);
722- Changed = true ;
723- }
713+ if (isKernelFunction (F)) {
714+ for (Argument &Arg : F.args ())
715+ if (Arg.getType ()->isPointerTy () && Arg.hasByValAttr () &&
716+ !isParamGridConstant (Arg)) {
717+ copyByValParam (F, Arg);
718+ Changed = true ;
719+ }
720+ }
724721 return Changed;
725722}
726723
0 commit comments