@@ -313,7 +313,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
313313 llvm::AtomicOrdering Ordering,
314314 llvm::LLVMContext &Ctx) const override ;
315315 void setTargetAtomicMetadata (CodeGenFunction &CGF,
316- llvm::AtomicRMWInst &RMW) const override ;
316+ llvm::Instruction &AtomicInst,
317+ const AtomicExpr *Expr = nullptr ) const override ;
317318 llvm::Value *createEnqueuedBlockKernel (CodeGenFunction &CGF,
318319 llvm::Function *BlockInvokeFunc,
319320 llvm::Type *BlockTy) const override ;
@@ -550,29 +551,39 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
550551}
551552
552553void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata (
553- CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
554+ CodeGenFunction &CGF, llvm::Instruction &AtomicInst,
555+ const AtomicExpr *AE) const {
556+ auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst);
557+ auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst);
554558
555- if (RMW.getPointerAddressSpace () == llvm::AMDGPUAS::FLAT_ADDRESS &&
556- CGF.CGM .getLangOpts ().threadPrivateMemoryAtomicsAreUndefined ()) {
559+ // OpenCL and old style HIP atomics consider atomics targeting thread private
560+ // memory to be undefined.
561+ //
562+ // TODO: This is probably undefined for atomic load/store, but there's not
563+ // much direct codegen benefit to knowing this.
564+ if (((RMW && RMW->getPointerAddressSpace () == llvm::AMDGPUAS::FLAT_ADDRESS) ||
565+ (CmpX &&
566+ CmpX->getPointerAddressSpace () == llvm::AMDGPUAS::FLAT_ADDRESS)) &&
567+ AE && AE->threadPrivateMemoryAtomicsAreUndefined ()) {
557568 llvm::MDBuilder MDHelper (CGF.getLLVMContext ());
558569 llvm::MDNode *ASRange = MDHelper.createRange (
559570 llvm::APInt (32 , llvm::AMDGPUAS::PRIVATE_ADDRESS),
560571 llvm::APInt (32 , llvm::AMDGPUAS::PRIVATE_ADDRESS + 1 ));
561- RMW .setMetadata (llvm::LLVMContext::MD_noalias_addrspace, ASRange);
572+ AtomicInst .setMetadata (llvm::LLVMContext::MD_noalias_addrspace, ASRange);
562573 }
563574
564- if (!CGF.getTarget ().allowAMDGPUUnsafeFPAtomics ())
575+ if (!RMW || ! CGF.getTarget ().allowAMDGPUUnsafeFPAtomics ())
565576 return ;
566577
567578 // TODO: Introduce new, more controlled options that also work for integers,
568579 // and deprecate allowAMDGPUUnsafeFPAtomics.
569- llvm::AtomicRMWInst::BinOp RMWOp = RMW. getOperation ();
580+ llvm::AtomicRMWInst::BinOp RMWOp = RMW-> getOperation ();
570581 if (llvm::AtomicRMWInst::isFPOperation (RMWOp)) {
571582 llvm::MDNode *Empty = llvm::MDNode::get (CGF.getLLVMContext (), {});
572- RMW. setMetadata (" amdgpu.no.fine.grained.memory" , Empty);
583+ RMW-> setMetadata (" amdgpu.no.fine.grained.memory" , Empty);
573584
574- if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW. getType ()->isFloatTy ())
575- RMW. setMetadata (" amdgpu.ignore.denormal.mode" , Empty);
585+ if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW-> getType ()->isFloatTy ())
586+ RMW-> setMetadata (" amdgpu.ignore.denormal.mode" , Empty);
576587 }
577588}
578589
0 commit comments