@@ -945,6 +945,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
945945 ISD::ATOMIC_LOAD_UMIN,
946946 ISD::ATOMIC_LOAD_UMAX,
947947 ISD::ATOMIC_LOAD_FADD,
948+ ISD::ATOMIC_LOAD_FMIN,
949+ ISD::ATOMIC_LOAD_FMAX,
948950 ISD::ATOMIC_LOAD_UINC_WRAP,
949951 ISD::ATOMIC_LOAD_UDEC_WRAP,
950952 ISD::INTRINSIC_VOID,
@@ -8707,25 +8709,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
87078709 case Intrinsic::amdgcn_ds_fmin:
87088710 case Intrinsic::amdgcn_ds_fmax: {
87098711 MemSDNode *M = cast<MemSDNode>(Op);
8710- unsigned Opc;
8711- switch (IntrID) {
8712- case Intrinsic::amdgcn_ds_fmin:
8713- Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
8714- break;
8715- case Intrinsic::amdgcn_ds_fmax:
8716- Opc = AMDGPUISD::ATOMIC_LOAD_FMAX;
8717- break;
8718- default:
8719- llvm_unreachable("Unknown intrinsic!");
8720- }
8721- SDValue Ops[] = {
8722- M->getOperand(0), // Chain
8723- M->getOperand(2), // Ptr
8724- M->getOperand(3) // Value
8725- };
8726-
8727- return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
8728- M->getMemoryVT(), M->getMemOperand());
8712+ unsigned Opc = IntrID == Intrinsic::amdgcn_ds_fmin ? ISD::ATOMIC_LOAD_FMIN
8713+ : ISD::ATOMIC_LOAD_FMAX;
8714+ return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(), M->getOperand(0),
8715+ M->getOperand(2), M->getOperand(3),
8716+ M->getMemOperand());
87298717 }
87308718 case Intrinsic::amdgcn_raw_buffer_load:
87318719 case Intrinsic::amdgcn_raw_ptr_buffer_load:
@@ -9130,22 +9118,21 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
91309118 case Intrinsic::amdgcn_global_atomic_fmin_num:
91319119 case Intrinsic::amdgcn_flat_atomic_fmin:
91329120 case Intrinsic::amdgcn_flat_atomic_fmin_num: {
9133- Opcode = AMDGPUISD ::ATOMIC_LOAD_FMIN;
9121+ Opcode = ISD ::ATOMIC_LOAD_FMIN;
91349122 break;
91359123 }
91369124 case Intrinsic::amdgcn_global_atomic_fmax:
91379125 case Intrinsic::amdgcn_global_atomic_fmax_num:
91389126 case Intrinsic::amdgcn_flat_atomic_fmax:
91399127 case Intrinsic::amdgcn_flat_atomic_fmax_num: {
9140- Opcode = AMDGPUISD ::ATOMIC_LOAD_FMAX;
9128+ Opcode = ISD ::ATOMIC_LOAD_FMAX;
91419129 break;
91429130 }
91439131 default:
91449132 llvm_unreachable("unhandled atomic opcode");
91459133 }
9146- return DAG.getMemIntrinsicNode(Opcode, SDLoc(Op),
9147- M->getVTList(), Ops, M->getMemoryVT(),
9148- M->getMemOperand());
9134+ return DAG.getAtomic(Opcode, SDLoc(Op), M->getMemoryVT(), M->getVTList(),
9135+ Ops, M->getMemOperand());
91499136 }
91509137 case Intrinsic::amdgcn_s_get_barrier_state: {
91519138 SDValue Chain = Op->getOperand(0);
@@ -15816,8 +15803,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
1581615803 case ISD::INTRINSIC_W_CHAIN:
1581715804 return AMDGPU::isIntrinsicSourceOfDivergence(N->getConstantOperandVal(1));
1581815805 case AMDGPUISD::ATOMIC_CMP_SWAP:
15819- case AMDGPUISD::ATOMIC_LOAD_FMIN:
15820- case AMDGPUISD::ATOMIC_LOAD_FMAX:
1582115806 case AMDGPUISD::BUFFER_ATOMIC_SWAP:
1582215807 case AMDGPUISD::BUFFER_ATOMIC_ADD:
1582315808 case AMDGPUISD::BUFFER_ATOMIC_SUB:
@@ -16077,17 +16062,21 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1607716062 return AtomicExpansionKind::CmpXChg;
1607816063 }
1607916064 case AtomicRMWInst::FMin:
16080- case AtomicRMWInst::FMax:
16065+ case AtomicRMWInst::FMax: {
16066+ Type *Ty = RMW->getType();
16067+
16068+ // LDS float and double fmin/fmax were always supported.
16069+ if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16070+ return AtomicExpansionKind::None;
16071+
16072+ return AtomicExpansionKind::CmpXChg;
16073+ }
1608116074 case AtomicRMWInst::Min:
1608216075 case AtomicRMWInst::Max:
1608316076 case AtomicRMWInst::UMin:
1608416077 case AtomicRMWInst::UMax: {
1608516078 if (AMDGPU::isFlatGlobalAddrSpace(AS) ||
1608616079 AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16087- if (RMW->getType()->isFloatTy() &&
16088- unsafeFPAtomicsDisabled(RMW->getFunction()))
16089- return AtomicExpansionKind::CmpXChg;
16090-
1609116080 // Always expand system scope min/max atomics.
1609216081 if (HasSystemScope)
1609316082 return AtomicExpansionKind::CmpXChg;
0 commit comments