Skip to content

Commit 876b9ab

Browse files
committed
[AMDGPU] Update
1 parent 30b9d5c commit 876b9ab

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6720,9 +6720,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
67206720
// We are only processing the operands of d16 image operations on subtargets
67216721
// that use the unpacked register layout, or need to repack the TFE result.
67226722

6723+
unsigned IntrOpcode = Intr->BaseOpcode;
6724+
// For image atomic: use no-return opcode if result is unused.
6725+
if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) {
6726+
const MachineRegisterInfo &MRI = MF.getRegInfo();
6727+
Register ResultDef = MI.getOperand(0).getReg();
6728+
if (MRI.use_nodbg_empty(ResultDef))
6729+
IntrOpcode = Intr->AtomicNoRetBaseOpcode;
6730+
}
67236731
// TODO: Do we need to guard against already legalized intrinsics?
67246732
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
6725-
AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
6733+
AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
67266734

67276735
MachineRegisterInfo *MRI = B.getMRI();
67286736
const LLT S32 = LLT::scalar(32);
@@ -6740,7 +6748,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
67406748

67416749
const bool IsAtomicPacked16Bit =
67426750
(BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
6743-
BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
6751+
BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
6752+
BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
6753+
BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
67446754

67456755
// Check for 16 bit addresses and pack if true.
67466756
LLT GradTy =

0 commit comments

Comments
 (0)