|
9 | 9 | #include "ABIInfoImpl.h" |
10 | 10 | #include "TargetInfo.h" |
11 | 11 | #include "clang/Basic/TargetOptions.h" |
| 12 | +#include "llvm/Support/AMDGPUAddrSpace.h" |
12 | 13 |
|
13 | 14 | using namespace clang; |
14 | 15 | using namespace clang::CodeGen; |
@@ -312,7 +313,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { |
312 | 313 | llvm::AtomicOrdering Ordering, |
313 | 314 | llvm::LLVMContext &Ctx) const override; |
314 | 315 | void setTargetAtomicMetadata(CodeGenFunction &CGF, |
315 | | - llvm::AtomicRMWInst &RMW) const override; |
| 316 | + llvm::Instruction &AtomicInst, |
| 317 | + const AtomicExpr *Expr = nullptr) const override; |
316 | 318 | llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF, |
317 | 319 | llvm::Function *BlockInvokeFunc, |
318 | 320 | llvm::Type *BlockTy) const override; |
@@ -546,19 +548,39 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts, |
546 | 548 | } |
547 | 549 |
|
548 | 550 | void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata( |
549 | | - CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const { |
550 | | - if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics()) |
| 551 | + CodeGenFunction &CGF, llvm::Instruction &AtomicInst, |
| 552 | + const AtomicExpr *AE) const { |
| 553 | + auto *RMW = dyn_cast<llvm::AtomicRMWInst>(&AtomicInst); |
| 554 | + auto *CmpX = dyn_cast<llvm::AtomicCmpXchgInst>(&AtomicInst); |
| 555 | + |
| 556 | + // OpenCL and old style HIP atomics consider atomics targeting thread private |
| 557 | + // memory to be undefined. |
| 558 | + // |
| 559 | + // TODO: This is probably undefined for atomic load/store, but there's not |
| 560 | + // much direct codegen benefit to knowing this. |
| 561 | + if (((RMW && RMW->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS) || |
| 562 | + (CmpX && |
| 563 | + CmpX->getPointerAddressSpace() == llvm::AMDGPUAS::FLAT_ADDRESS)) && |
| 564 | + AE && AE->threadPrivateMemoryAtomicsAreUndefined()) { |
| 565 | + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); |
| 566 | + llvm::MDNode *ASRange = MDHelper.createRange( |
| 567 | + llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS), |
| 568 | + llvm::APInt(32, llvm::AMDGPUAS::PRIVATE_ADDRESS + 1)); |
| 569 | + AtomicInst.setMetadata(llvm::LLVMContext::MD_noalias_addrspace, ASRange); |
| 570 | + } |
| 571 | + |
| 572 | + if (!RMW || !CGF.getTarget().allowAMDGPUUnsafeFPAtomics()) |
551 | 573 | return; |
552 | 574 |
|
553 | 575 | // TODO: Introduce new, more controlled options that also work for integers, |
554 | 576 | // and deprecate allowAMDGPUUnsafeFPAtomics. |
555 | | - llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation(); |
| 577 | + llvm::AtomicRMWInst::BinOp RMWOp = RMW->getOperation(); |
556 | 578 | if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) { |
557 | 579 | llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {}); |
558 | | - RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty); |
| 580 | + RMW->setMetadata("amdgpu.no.fine.grained.memory", Empty); |
559 | 581 |
|
560 | | - if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy()) |
561 | | - RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); |
| 582 | + if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW->getType()->isFloatTy()) |
| 583 | + RMW->setMetadata("amdgpu.ignore.denormal.mode", Empty); |
562 | 584 | } |
563 | 585 | } |
564 | 586 |
|
|
0 commit comments