@@ -2890,51 +2890,47 @@ CGOpenMPRuntimeGPU::emitFastFPAtomicCall(CodeGenFunction &CGF, LValue X,
28902890 RValue Update, BinaryOperatorKind BO,
28912891 bool IsXBinopExpr) {
28922892 CGBuilderTy &Bld = CGF.Builder ;
2893- unsigned int IID = -1 ;
2894- RValue UpdateFixed = Update;
2893+ llvm::AtomicRMWInst::BinOp Kind = llvm::AtomicRMWInst::FAdd;
28952894 switch (BO) {
28962895 case BO_Sub:
2897- UpdateFixed = RValue::get (Bld.CreateFNeg (Update.getScalarVal ()));
2898- IID = llvm::Intrinsic::amdgcn_flat_atomic_fadd;
2896+ Kind = llvm::AtomicRMWInst::FSub;
28992897 break ;
29002898 case BO_Add:
2901- IID = llvm::Intrinsic::amdgcn_flat_atomic_fadd ;
2899+ Kind = llvm::AtomicRMWInst::FAdd ;
29022900 break ;
29032901 case BO_LT:
2904- IID = IsXBinopExpr ? llvm::Intrinsic::amdgcn_flat_atomic_fmax
2905- : llvm::Intrinsic::amdgcn_flat_atomic_fmin;
2902+ Kind = IsXBinopExpr ? llvm::AtomicRMWInst::FMax : llvm::AtomicRMWInst::FMin;
29062903 break ;
29072904 case BO_GT:
2908- IID = IsXBinopExpr ? llvm::Intrinsic::amdgcn_flat_atomic_fmin
2909- : llvm::Intrinsic::amdgcn_flat_atomic_fmax;
2905+ Kind = IsXBinopExpr ? llvm::AtomicRMWInst::FMin : llvm::AtomicRMWInst::FMax;
29102906 break ;
29112907 default :
29122908 // remaining operations are not supported yet
29132909 return std::make_pair (false , RValue::get (nullptr ));
29142910 }
29152911
2916- SmallVector<llvm::Value *> FPAtomicArgs;
2917- FPAtomicArgs.reserve (2 );
2918- FPAtomicArgs.push_back (X.getPointer (CGF));
2919- FPAtomicArgs.push_back (UpdateFixed.getScalarVal ());
2912+ llvm::Value *UpdateVal = Update.getScalarVal ();
2913+
2914+ // The scope of the atomic, currently set to 'agent'. By default, if this
2915+ // scope is not specified the scope will be 'system' scope.
2916+ llvm::SyncScope::ID SSID =
2917+ CGM.getLLVMContext ().getOrInsertSyncScopeID (" agent" );
2918+ llvm::AtomicRMWInst *CallInst = Bld.CreateAtomicRMW (
2919+ Kind, X.getAddress (), UpdateVal, llvm::AtomicOrdering::Monotonic, SSID);
2920+
2921+ // The following settings are used to get the atomicrmw instruction to
2922+ // be closer in spirit to the previous use of the intrinsic.
2923+ // Setting of amdgpu.no.fine.grained.memory property
2924+ llvm::MDTuple *EmptyMD = llvm::MDNode::get (CGM.getLLVMContext (), {});
2925+ CallInst->setMetadata (" amdgpu.no.fine.grained.memory" , EmptyMD);
2926+
2927+ // Setting of amdgpu.ignore.denormal.mode
2928+ if (Kind == llvm::AtomicRMWInst::FAdd && UpdateVal->getType ()->isFloatTy ())
2929+ CallInst->setMetadata (" amdgpu.ignore.denormal.mode" , EmptyMD);
2930+
2931+ // Note: breaks fp_atomics test so volatile cannot be used
2932+ // CallInst->setVolatile(true);
29202933
2921- llvm::Value *CallInst = nullptr ;
2922- if (Update.getScalarVal ()->getType ()->isFloatTy () &&
2923- (getOffloadArch (CGF.CGM ) == OffloadArch::GFX90a)) {
2924- // Fast FP atomics are not available for single precision address located in
2925- // FLAT address space.
2926- // We need to check the address space at runtime to determine
2927- // which function we can call. This is done in the OpenMP runtime.
2928- CallInst =
2929- CGF.EmitRuntimeCall (OMPBuilder.getOrCreateRuntimeFunction (
2930- CGM.getModule (), OMPRTL___kmpc_unsafeAtomicAdd),
2931- FPAtomicArgs);
2932- } else {
2933- llvm::Function *AtomicF = CGM.getIntrinsic (
2934- IID, {FPAtomicArgs[1 ]->getType (), FPAtomicArgs[0 ]->getType (),
2935- FPAtomicArgs[1 ]->getType ()});
2936- CallInst = CGF.EmitNounwindRuntimeCall (AtomicF, FPAtomicArgs);
2937- }
29382934 return std::make_pair (true , RValue::get (CallInst));
29392935}
29402936
0 commit comments