@@ -6720,9 +6720,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
67206720 // We are only processing the operands of d16 image operations on subtargets
67216721 // that use the unpacked register layout, or need to repack the TFE result.
67226722
6723+ unsigned IntrOpcode = Intr->BaseOpcode ;
6724+ // For image atomic: use no-return opcode if result is unused.
6725+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode ) {
6726+ const MachineRegisterInfo &MRI = MF.getRegInfo ();
6727+ Register ResultDef = MI.getOperand (0 ).getReg ();
6728+ if (MRI.use_nodbg_empty (ResultDef))
6729+ IntrOpcode = Intr->AtomicNoRetBaseOpcode ;
6730+ }
67236731 // TODO: Do we need to guard against already legalized intrinsics?
67246732 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
6725- AMDGPU::getMIMGBaseOpcodeInfo (Intr-> BaseOpcode );
6733+ AMDGPU::getMIMGBaseOpcodeInfo (IntrOpcode );
67266734
67276735 MachineRegisterInfo *MRI = B.getMRI ();
67286736 const LLT S32 = LLT::scalar (32 );
@@ -6740,7 +6748,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
67406748
67416749 const bool IsAtomicPacked16Bit =
67426750 (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
6743- BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
6751+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
6752+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
6753+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
67446754
67456755 // Check for 16 bit addresses and pack if true.
67466756 LLT GradTy =
0 commit comments