diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 37563203f2f83..cef87e077cc5c 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1812,6 +1812,13 @@ The AMDGPU backend supports the following LLVM IR attributes. offset by one less than the number of dynamic VGPR blocks required by the function encoded in bits 5..3. + "amdgpu-cluster-dims"="x,y,z" Specify the cluster workgroup dimensions. A value of "0,0,0" indicates that + cluster is disabled. A value of "1024,1024,1024" indicates that cluster is enabled, + but the dimensions cannot be determined at compile time. Any other value explicitly + specifies the cluster dimensions. + + This is only relevant on targets with cluster support. + ================================================ ========================================================== Calling Conventions diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp index d158f0f58d711..dda8033f47398 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.cpp @@ -107,6 +107,14 @@ AMDGPUFunctionArgInfo::getPreloadedValue( case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z: return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z: + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID: + return std::tuple(nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); case AMDGPUFunctionArgInfo::LDS_KERNEL_ID: return std::tuple(LDSKernelId ? &LDSKernelId : nullptr, &AMDGPU::SGPR_32RegClass, LLT::scalar(32)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index e07d47381ecca..1064e57b9da9e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -111,18 +111,25 @@ struct AMDGPUFunctionArgInfo { DISPATCH_ID = 4, FLAT_SCRATCH_INIT = 5, LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI - WORKGROUP_ID_X = 10, - WORKGROUP_ID_Y = 11, - WORKGROUP_ID_Z = 12, + WORKGROUP_ID_X = 10, // Also used for cluster ID X. + WORKGROUP_ID_Y = 11, // Also used for cluster ID Y. + WORKGROUP_ID_Z = 12, // Also used for cluster ID Z. PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14, IMPLICIT_BUFFER_PTR = 15, IMPLICIT_ARG_PTR = 16, PRIVATE_SEGMENT_SIZE = 17, + CLUSTER_WORKGROUP_ID_X = 21, + CLUSTER_WORKGROUP_ID_Y = 22, + CLUSTER_WORKGROUP_ID_Z = 23, + CLUSTER_WORKGROUP_MAX_ID_X = 24, + CLUSTER_WORKGROUP_MAX_ID_Y = 25, + CLUSTER_WORKGROUP_MAX_ID_Z = 26, + CLUSTER_WORKGROUP_MAX_FLAT_ID = 27, // VGPRS: - WORKITEM_ID_X = 18, - WORKITEM_ID_Y = 19, - WORKITEM_ID_Z = 20, + WORKITEM_ID_X = 28, + WORKITEM_ID_Y = 29, + WORKITEM_ID_Z = 30, FIRST_VGPR_VALUE = WORKITEM_ID_X }; // clang-format on diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index f18536cd4ab93..d8c4cbbc4fa33 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4452,6 +4452,74 @@ void AMDGPULegalizerInfo::buildLoadInputValue(Register DstReg, } } +bool AMDGPULegalizerInfo::legalizeWorkGroupId( + MachineInstr &MI, MachineIRBuilder &B, + AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const { + Register DstReg = MI.getOperand(0).getReg(); + if (!ST.hasClusters()) { + if (!loadInputValue(DstReg, B, WorkGroupIdPV)) + return false; + MI.eraseFromParent(); + return true; + } + + // Clusters are supported. Return the global position in the grid. If clusters + // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID. + + // WorkGroupIdXYZ = ClusterId == 0 ? + // ClusterIdXYZ : + // ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ + MachineRegisterInfo &MRI = *B.getMRI(); + const LLT S32 = LLT::scalar(32); + Register ClusterIdXYZ = MRI.createGenericVirtualRegister(S32); + Register ClusterMaxIdXYZ = MRI.createGenericVirtualRegister(S32); + Register ClusterWorkGroupIdXYZ = MRI.createGenericVirtualRegister(S32); + if (!loadInputValue(ClusterIdXYZ, B, WorkGroupIdPV) || + !loadInputValue(ClusterWorkGroupIdXYZ, B, ClusterWorkGroupIdPV) || + !loadInputValue(ClusterMaxIdXYZ, B, ClusterMaxIdPV)) + return false; + + auto One = B.buildConstant(S32, 1); + auto ClusterSizeXYZ = B.buildAdd(S32, ClusterMaxIdXYZ, One); + auto GlobalIdXYZ = B.buildAdd(S32, ClusterWorkGroupIdXYZ, + B.buildMul(S32, ClusterIdXYZ, ClusterSizeXYZ)); + + const SIMachineFunctionInfo *MFI = B.getMF().getInfo(); + + switch (MFI->getClusterDims().getKind()) { + case AMDGPU::ClusterDimsAttr::Kind::FixedDims: + case AMDGPU::ClusterDimsAttr::Kind::VariableDims: { + B.buildCopy(DstReg, GlobalIdXYZ); + MI.eraseFromParent(); + return true; + } + case AMDGPU::ClusterDimsAttr::Kind::NoCluster: { + B.buildCopy(DstReg, ClusterIdXYZ); + MI.eraseFromParent(); + return true; + } + case AMDGPU::ClusterDimsAttr::Kind::Unknown: { + using namespace AMDGPU::Hwreg; + unsigned ClusterIdField = HwregEncoding::encode(ID_IB_STS2, 6, 4); + Register ClusterId = MRI.createGenericVirtualRegister(S32); + MRI.setRegClass(ClusterId, &AMDGPU::SReg_32RegClass); + B.buildInstr(AMDGPU::S_GETREG_B32_const) + .addDef(ClusterId) + .addImm(ClusterIdField); + auto Zero = B.buildConstant(S32, 0); + auto NoClusters = + B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), ClusterId, Zero); + B.buildSelect(DstReg, NoClusters, ClusterIdXYZ, GlobalIdXYZ); + MI.eraseFromParent(); + return true; + } + } + + llvm_unreachable("nothing should reach here"); +} + bool AMDGPULegalizerInfo::loadInputValue( Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { @@ -4471,8 +4539,31 @@ bool AMDGPULegalizerInfo::loadInputValue( AMDGPU::isEntryFunctionCC(CC) && !MFI->hasWorkGroupIDZ() ? ~0u : 0xFFFFu); const ArgDescriptor WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); + const ArgDescriptor ClusterWorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu); + const ArgDescriptor ClusterWorkGroupIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u); + const ArgDescriptor ClusterWorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u); + const ArgDescriptor ClusterWorkGroupMaxIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u); + const ArgDescriptor ClusterWorkGroupMaxIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u); + const ArgDescriptor ClusterWorkGroupMaxIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u); + const ArgDescriptor ClusterWorkGroupMaxFlatID = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u); + + auto LoadConstant = [&](unsigned N) { + B.buildConstant(DstReg, N); + return true; + }; + if (ST.hasArchitectedSGPRs() && (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) { + AMDGPU::ClusterDimsAttr ClusterDims = MFI->getClusterDims(); + bool HasFixedDims = ClusterDims.isFixedDims(); + switch (ArgType) { case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Arg = &WorkGroupIDX; @@ -4489,6 +4580,53 @@ bool AMDGPULegalizerInfo::loadInputValue( ArgRC = &AMDGPU::SReg_32RegClass; ArgTy = LLT::scalar(32); break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X: + if (HasFixedDims && ClusterDims.getDims()[0] == 1) + return LoadConstant(0); + Arg = &ClusterWorkGroupIDX; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y: + if (HasFixedDims && ClusterDims.getDims()[1] == 1) + return LoadConstant(0); + Arg = &ClusterWorkGroupIDY; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z: + if (HasFixedDims && ClusterDims.getDims()[2] == 1) + return LoadConstant(0); + Arg = &ClusterWorkGroupIDZ; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[0] - 1); + Arg = &ClusterWorkGroupMaxIDX; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[1] - 1); + Arg = &ClusterWorkGroupMaxIDY; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[2] - 1); + Arg = &ClusterWorkGroupMaxIDZ; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID: + Arg = &ClusterWorkGroupMaxFlatID; + ArgRC = &AMDGPU::SReg_32RegClass; + ArgTy = LLT::scalar(32); + break; default: break; } @@ -4499,10 +4637,9 @@ bool AMDGPULegalizerInfo::loadInputValue( if (!Arg) { if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) { - // The intrinsic may appear when we have a 0 sized kernarg segment, in which - // case the pointer argument may be missing and we use null. - B.buildConstant(DstReg, 0); - return true; + // The intrinsic may appear when we have a 0 sized kernarg segment, in + // which case the pointer argument may be missing and we use null. + return LoadConstant(0); } // It's undefined behavior if a function marked with the amdgpu-no-* @@ -7415,6 +7552,22 @@ bool AMDGPULegalizerInfo::legalizeWaveID(MachineInstr &MI, return true; } +bool AMDGPULegalizerInfo::legalizeConstHwRegRead(MachineInstr &MI, + MachineIRBuilder &B, + AMDGPU::Hwreg::Id HwReg, + unsigned LowBit, + unsigned Width) const { + MachineRegisterInfo &MRI = *B.getMRI(); + Register DstReg = MI.getOperand(0).getReg(); + if (!MRI.getRegClassOrNull(DstReg)) + MRI.setRegClass(DstReg, &AMDGPU::SReg_32RegClass); + B.buildInstr(AMDGPU::S_GETREG_B32_const) + .addDef(DstReg) + .addImm(AMDGPU::Hwreg::HwregEncoding::encode(HwReg, LowBit, Width)); + MI.eraseFromParent(); + return true; +} + static constexpr unsigned FPEnvModeBitField = AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 23); @@ -7577,14 +7730,64 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return legalizeWorkitemIDIntrinsic(MI, MRI, B, 2, AMDGPUFunctionArgInfo::WORKITEM_ID_Z); case Intrinsic::amdgcn_workgroup_id_x: - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + return legalizeWorkGroupId( + MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X); case Intrinsic::amdgcn_workgroup_id_y: - return legalizePreloadedArgIntrin(MI, MRI, B, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + return legalizeWorkGroupId( + MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y); case Intrinsic::amdgcn_workgroup_id_z: - return legalizePreloadedArgIntrin(MI, MRI, B, + return legalizeWorkGroupId( + MI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_id_x: + return ST.hasClusters() && + legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + case Intrinsic::amdgcn_cluster_id_y: + return ST.hasClusters() && + legalizePreloadedArgIntrin(MI, MRI, B, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + case Intrinsic::amdgcn_cluster_id_z: + return ST.hasClusters() && + legalizePreloadedArgIntrin(MI, MRI, B, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_workgroup_id_x: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X); + case Intrinsic::amdgcn_cluster_workgroup_id_y: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y); + case Intrinsic::amdgcn_cluster_workgroup_id_z: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_workgroup_flat_id: + return ST.hasClusters() && + legalizeConstHwRegRead(MI, B, AMDGPU::Hwreg::ID_IB_STS2, 21, 4); + case Intrinsic::amdgcn_cluster_workgroup_max_id_x: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X); + case Intrinsic::amdgcn_cluster_workgroup_max_id_y: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y); + case Intrinsic::amdgcn_cluster_workgroup_max_id_z: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z); + case Intrinsic::amdgcn_cluster_workgroup_max_flat_id: + return ST.hasClusters() && + legalizePreloadedArgIntrin( + MI, MRI, B, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID); case Intrinsic::amdgcn_wave_id: return legalizeWaveID(MI, B); case Intrinsic::amdgcn_lds_kernel_id: diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index 1f4e02b0d600a..cd44a9ba0807c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -114,6 +114,11 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { void buildLoadInputValue(Register DstReg, MachineIRBuilder &B, const ArgDescriptor *Arg, const TargetRegisterClass *ArgRC, LLT ArgTy) const; + bool legalizeWorkGroupId( + MachineInstr &MI, MachineIRBuilder &B, + AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const; bool loadInputValue(Register DstReg, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; @@ -218,6 +223,9 @@ class AMDGPULegalizerInfo final : public LegalizerInfo { bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const; bool legalizeWaveID(MachineInstr &MI, MachineIRBuilder &B) const; + bool legalizeConstHwRegRead(MachineInstr &MI, MachineIRBuilder &B, + AMDGPU::Hwreg::Id HwReg, unsigned LowBit, + unsigned Width) const; bool legalizeGetFPEnv(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4927d2be67590..3332723b038f5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2443,6 +2443,53 @@ SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, return DAG.getMergeValues({ConvertedVal, ArgValue.getValue(1)}, SL); } +SDValue SITargetLowering::lowerWorkGroupId( + SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, + AMDGPUFunctionArgInfo::PreloadedValue WorkGroupIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const { + if (!Subtarget->hasClusters()) + return getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV); + + // Clusters are supported. Return the global position in the grid. If clusters + // are enabled, WorkGroupIdPV returns the cluster ID not the workgroup ID. + + // WorkGroupIdXYZ = ClusterId == 0 ? + // ClusterIdXYZ : + // ClusterIdXYZ * (ClusterMaxIdXYZ + 1) + ClusterWorkGroupIdXYZ + SDValue ClusterIdXYZ = getPreloadedValue(DAG, MFI, VT, WorkGroupIdPV); + SDLoc SL(ClusterIdXYZ); + SDValue ClusterMaxIdXYZ = getPreloadedValue(DAG, MFI, VT, ClusterMaxIdPV); + SDValue One = DAG.getConstant(1, SL, VT); + SDValue ClusterSizeXYZ = DAG.getNode(ISD::ADD, SL, VT, ClusterMaxIdXYZ, One); + SDValue ClusterWorkGroupIdXYZ = + getPreloadedValue(DAG, MFI, VT, ClusterWorkGroupIdPV); + SDValue GlobalIdXYZ = + DAG.getNode(ISD::ADD, SL, VT, ClusterWorkGroupIdXYZ, + DAG.getNode(ISD::MUL, SL, VT, ClusterIdXYZ, ClusterSizeXYZ)); + + switch (MFI.getClusterDims().getKind()) { + case AMDGPU::ClusterDimsAttr::Kind::FixedDims: + case AMDGPU::ClusterDimsAttr::Kind::VariableDims: + return GlobalIdXYZ; + case AMDGPU::ClusterDimsAttr::Kind::NoCluster: + return ClusterIdXYZ; + case AMDGPU::ClusterDimsAttr::Kind::Unknown: { + using namespace AMDGPU::Hwreg; + SDValue ClusterIdField = + DAG.getTargetConstant(HwregEncoding::encode(ID_IB_STS2, 6, 4), SL, VT); + SDNode *GetReg = + DAG.getMachineNode(AMDGPU::S_GETREG_B32_const, SL, VT, ClusterIdField); + SDValue ClusterId(GetReg, 0); + SDValue Zero = DAG.getConstant(0, SL, VT); + return DAG.getNode(ISD::SELECT_CC, SL, VT, ClusterId, Zero, ClusterIdXYZ, + GlobalIdXYZ, DAG.getCondCode(ISD::SETEQ)); + } + } + + llvm_unreachable("nothing should reach here"); +} + SDValue SITargetLowering::getPreloadedValue( SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, AMDGPUFunctionArgInfo::PreloadedValue PVID) const { @@ -2461,9 +2508,30 @@ SDValue SITargetLowering::getPreloadedValue( AMDGPU::isEntryFunctionCC(CC) && !MFI.hasWorkGroupIDZ() ? ~0u : 0xFFFFu); const ArgDescriptor WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::TTMP7, 0xFFFF0000u); + const ArgDescriptor ClusterWorkGroupIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000000Fu); + const ArgDescriptor ClusterWorkGroupIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000000F0u); + const ArgDescriptor ClusterWorkGroupIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00000F00u); + const ArgDescriptor ClusterWorkGroupMaxIDX = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0000F000u); + const ArgDescriptor ClusterWorkGroupMaxIDY = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x000F0000u); + const ArgDescriptor ClusterWorkGroupMaxIDZ = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x00F00000u); + const ArgDescriptor ClusterWorkGroupMaxFlatID = + ArgDescriptor::createRegister(AMDGPU::TTMP6, 0x0F000000u); + + auto LoadConstant = [&](unsigned N) { + return DAG.getConstant(N, SDLoc(), VT); + }; + if (Subtarget->hasArchitectedSGPRs() && - (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx || - CC == CallingConv::AMDGPU_Gfx_WholeWave)) { + (AMDGPU::isCompute(CC) || CC == CallingConv::AMDGPU_Gfx)) { + AMDGPU::ClusterDimsAttr ClusterDims = MFI.getClusterDims(); + bool HasFixedDims = ClusterDims.isFixedDims(); + switch (PVID) { case AMDGPUFunctionArgInfo::WORKGROUP_ID_X: Reg = &WorkGroupIDX; @@ -2480,6 +2548,53 @@ SDValue SITargetLowering::getPreloadedValue( RC = &AMDGPU::SReg_32RegClass; Ty = LLT::scalar(32); break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X: + if (HasFixedDims && ClusterDims.getDims()[0] == 1) + return LoadConstant(0); + Reg = &ClusterWorkGroupIDX; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y: + if (HasFixedDims && ClusterDims.getDims()[1] == 1) + return LoadConstant(0); + Reg = &ClusterWorkGroupIDY; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z: + if (HasFixedDims && ClusterDims.getDims()[2] == 1) + return LoadConstant(0); + Reg = &ClusterWorkGroupIDZ; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[0] - 1); + Reg = &ClusterWorkGroupMaxIDX; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[1] - 1); + Reg = &ClusterWorkGroupMaxIDY; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z: + if (HasFixedDims) + return LoadConstant(ClusterDims.getDims()[2] - 1); + Reg = &ClusterWorkGroupMaxIDZ; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; + case AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID: + Reg = &ClusterWorkGroupMaxFlatID; + RC = &AMDGPU::SReg_32RegClass; + Ty = LLT::scalar(32); + break; default: break; } @@ -9539,6 +9654,19 @@ SDValue SITargetLowering::lowerWaveID(SelectionDAG &DAG, SDValue Op) const { DAG.getConstant(25, SL, VT), DAG.getConstant(5, SL, VT)); } +SDValue SITargetLowering::lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op, + AMDGPU::Hwreg::Id HwReg, + unsigned LowBit, + unsigned Width) const { + SDLoc SL(Op); + using namespace AMDGPU::Hwreg; + return {DAG.getMachineNode( + AMDGPU::S_GETREG_B32_const, SL, MVT::i32, + DAG.getTargetConstant(HwregEncoding::encode(HwReg, LowBit, Width), + SL, MVT::i32)), + 0}; +} + SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &Arg) const { @@ -9685,14 +9813,81 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerImplicitZextParam(DAG, Op, MVT::i16, SI::KernelInputOffsets::LOCAL_SIZE_Z); case Intrinsic::amdgcn_workgroup_id_x: - return getPreloadedValue(DAG, *MFI, VT, - AMDGPUFunctionArgInfo::WORKGROUP_ID_X); + return lowerWorkGroupId(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X); case Intrinsic::amdgcn_workgroup_id_y: - return getPreloadedValue(DAG, *MFI, VT, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); + return lowerWorkGroupId(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y); case Intrinsic::amdgcn_workgroup_id_z: - return getPreloadedValue(DAG, *MFI, VT, - AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); + return lowerWorkGroupId(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z); + case Intrinsic::amdgcn_cluster_id_x: + return Subtarget->hasClusters() + ? getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_X) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_id_y: + return Subtarget->hasClusters() + ? getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Y) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_id_z: + return Subtarget->hasClusters() + ? getPreloadedValue(DAG, *MFI, VT, + AMDGPUFunctionArgInfo::WORKGROUP_ID_Z) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_id_x: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_X) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_id_y: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Y) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_id_z: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_ID_Z) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_flat_id: + return Subtarget->hasClusters() + ? lowerConstHwRegRead(DAG, Op, AMDGPU::Hwreg::ID_IB_STS2, 21, 4) + : SDValue(); + case Intrinsic::amdgcn_cluster_workgroup_max_id_x: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_X) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_max_id_y: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Y) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_max_id_z: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_ID_Z) + : DAG.getPOISON(VT); + case Intrinsic::amdgcn_cluster_workgroup_max_flat_id: + return Subtarget->hasClusters() + ? getPreloadedValue( + DAG, *MFI, VT, + AMDGPUFunctionArgInfo::CLUSTER_WORKGROUP_MAX_FLAT_ID) + : DAG.getPOISON(VT); case Intrinsic::amdgcn_wave_id: return lowerWaveID(DAG, Op); case Intrinsic::amdgcn_lds_kernel_id: { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 9c26cfa44a83e..ba408a8f64540 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -16,6 +16,7 @@ #include "AMDGPUArgumentUsageInfo.h" #include "AMDGPUISelLowering.h" +#include "SIDefines.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -64,6 +65,11 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA, const SDLoc &SL, SDValue Chain, const ISD::InputArg &Arg) const; + SDValue lowerWorkGroupId( + SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, + AMDGPUFunctionArgInfo::PreloadedValue ClusterIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterMaxIdPV, + AMDGPUFunctionArgInfo::PreloadedValue ClusterWorkGroupIdPV) const; SDValue getPreloadedValue(SelectionDAG &DAG, const SIMachineFunctionInfo &MFI, EVT VT, @@ -84,6 +90,9 @@ class SITargetLowering final : public AMDGPUTargetLowering { unsigned NewOpcode) const; SDValue lowerWaveID(SelectionDAG &DAG, SDValue Op) const; + SDValue lowerConstHwRegRead(SelectionDAG &DAG, SDValue Op, + AMDGPU::Hwreg::Id HwReg, unsigned LowBit, + unsigned Width) const; SDValue lowerWorkitemID(SelectionDAG &DAG, SDValue Op, unsigned Dim, const ArgDescriptor &ArgDesc) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 24a20cc9dcf82..dffb3d7459e64 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -928,7 +928,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return Opcode == AMDGPU::S_CMPK_EQ_U32 || Opcode == AMDGPU::S_CMPK_LG_U32 || Opcode == AMDGPU::S_CMPK_GT_U32 || Opcode == AMDGPU::S_CMPK_GE_U32 || Opcode == AMDGPU::S_CMPK_LT_U32 || Opcode == AMDGPU::S_CMPK_LE_U32 || - Opcode == AMDGPU::S_GETREG_B32; + Opcode == AMDGPU::S_GETREG_B32 || + Opcode == AMDGPU::S_GETREG_B32_const; } /// \returns true if this is an s_store_dword* instruction. This is more diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 54426d33d3473..1f11be475e9f8 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -195,6 +195,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F, VGPRForAGPRCopy = AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - 1); } + + ClusterDims = AMDGPU::ClusterDimsAttr::get(F); } MachineFunctionInfo *SIMachineFunctionInfo::clone( diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index ca8f8033a2d54..45606153db58e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -465,6 +465,9 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // Default/requested number of work groups for the function. SmallVector MaxNumWorkGroups = {0, 0, 0}; + // Requested cluster dimensions. + AMDGPU::ClusterDimsAttr ClusterDims; + private: unsigned NumUserSGPRs = 0; unsigned NumSystemSGPRs = 0; @@ -1207,6 +1210,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups[0]; } unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups[1]; } unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups[2]; } + + AMDGPU::ClusterDimsAttr getClusterDims() const { return ClusterDims; } }; } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index fe94887cdff98..296ce5a46287c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1127,19 +1127,26 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo < "$sdst, $simm16" >; -// This is hasSideEffects to allow its use in readcyclecounter selection. // FIXME: Need to truncate immediate to 16-bits. -// FIXME: Should have separate pseudos for known may read MODE and -// only read MODE. -def S_GETREG_B32 : SOPK_Pseudo < +class S_GETREG_B32_Pseudo pattern=[]> : SOPK_Pseudo < "s_getreg_b32", (outs SReg_32:$sdst), (ins hwreg:$simm16), - "$sdst, $simm16", - [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> { + "$sdst, $simm16", pattern>; + +// This is hasSideEffects to allow its use in readcyclecounter selection. +// FIXME: Should have separate pseudos for known may read MODE and +// only read MODE. +def S_GETREG_B32 : S_GETREG_B32_Pseudo< + [(set i32:$sdst, (int_amdgcn_s_getreg (i32 timm:$simm16)))]> { let hasSideEffects = 1; let Uses = [MODE]; } +// A version of the pseudo for reading hardware register fields that are +// known to remain the same during the course of the run. Has no side +// effects and doesn't read MODE. +def S_GETREG_B32_const : S_GETREG_B32_Pseudo; + let Defs = [MODE], Uses = [MODE] in { // FIXME: Need to truncate immediate to 16-bits. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 40da4f96aefdb..faae1fee342af 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -3533,6 +3533,54 @@ bool isPackedFP32Inst(unsigned Opc) { } } +const std::array &ClusterDimsAttr::getDims() const { + assert(isFixedDims() && "expect kind to be FixedDims"); + return Dims; +} + +std::string ClusterDimsAttr::to_string() const { + SmallString<10> Buffer; + raw_svector_ostream OS(Buffer); + + switch (getKind()) { + case Kind::Unknown: + return ""; + case Kind::NoCluster: { + OS << EncoNoCluster << ',' << EncoNoCluster << ',' << EncoNoCluster; + return Buffer.c_str(); + } + case Kind::VariableDims: { + OS << EncoVariableDims << ',' << EncoVariableDims << ',' + << EncoVariableDims; + return Buffer.c_str(); + } + case Kind::FixedDims: { + OS << Dims[0] << ',' << Dims[1] << ',' << Dims[2]; + return Buffer.c_str(); + } + } + llvm_unreachable("Unknown ClusterDimsAttr kind"); +} + +ClusterDimsAttr ClusterDimsAttr::get(const Function &F) { + std::optional> Attr = + getIntegerVecAttribute(F, "amdgpu-cluster-dims", /*Size=*/3); + ClusterDimsAttr::Kind AttrKind = Kind::FixedDims; + + if (!Attr.has_value()) + AttrKind = Kind::Unknown; + else if (all_of(*Attr, [](unsigned V) { return V == EncoNoCluster; })) + AttrKind = Kind::NoCluster; + else if (all_of(*Attr, [](unsigned V) { return V == EncoVariableDims; })) + AttrKind = Kind::VariableDims; + + ClusterDimsAttr A(AttrKind); + if (AttrKind == Kind::FixedDims) + A.Dims = {(*Attr)[0], (*Attr)[1], (*Attr)[2]}; + + return A; +} + } // namespace AMDGPU raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 3fcd16f9290b1..3f8d43db5a48c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1813,6 +1813,50 @@ bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode); /// must be defined in terms of bytes. unsigned getLdsDwGranularity(const MCSubtargetInfo &ST); +class ClusterDimsAttr { +public: + enum class Kind { Unknown, NoCluster, VariableDims, FixedDims }; + + ClusterDimsAttr() = default; + + Kind getKind() const { return AttrKind; } + + bool isUnknown() const { return getKind() == Kind::Unknown; } + + bool isNoCluster() const { return getKind() == Kind::NoCluster; } + + bool isFixedDims() const { return getKind() == Kind::FixedDims; } + + bool isVariableedDims() const { return getKind() == Kind::VariableDims; } + + void setUnknown() { *this = ClusterDimsAttr(Kind::Unknown); } + + void setNoCluster() { *this = ClusterDimsAttr(Kind::NoCluster); } + + void setVariableDims() { *this = ClusterDimsAttr(Kind::VariableDims); } + + /// \returns the dims stored. Note that this function can only be called if + /// the kind is \p Fixed. + const std::array &getDims() const; + + bool operator==(const ClusterDimsAttr &RHS) const { + return AttrKind == RHS.AttrKind && Dims == RHS.Dims; + } + + std::string to_string() const; + + static ClusterDimsAttr get(const Function &F); + +private: + enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 }; + + ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {} + + std::array Dims = {0, 0, 0}; + + Kind AttrKind = Kind::Unknown; +}; + } // end namespace AMDGPU raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll new file mode 100644 index 0000000000000..aa3b7b3606fd8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.id.ll @@ -0,0 +1,1258 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s + +declare i32 @llvm.amdgcn.cluster.workgroup.id.x() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.id.y() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.id.z() #0 + +define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_x: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_x: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_x: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_and_b32 s2, ttmp6, 15 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1,2,2" { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_x_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_x_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_x_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_x_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_y: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_y: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_y: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_y_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_y_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_y_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_y_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_z: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_z: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_z: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_flat_id(ptr addrspace(1) %out) { +; CHECK-UNKNOWN-LABEL: test_workgroup_flat_id: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_flat_id: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_flat_id: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_flat_id: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 21, 4) +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.flat.id() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,2,1" { +; CHECK-UNKNOWN-LABEL: test_workgroup_id_z_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_id_z_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_id_z_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_id_z_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 1 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v0, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll new file mode 100644 index 0000000000000..afe37e371fbc3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.flat.id.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s + +declare i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() #0 + +define amdgpu_kernel void @test_workgroup_max_flat_id(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_flat_id: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_flat_id: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_flat_id: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_flat_id: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40018 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.flat.id() + store i32 %id, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll new file mode 100644 index 0000000000000..7ea4fa5373e57 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cluster.workgroup.max.id.ll @@ -0,0 +1,1077 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-UNKNOWN %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-MESA3D %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 %s -o - | FileCheck --check-prefixes=CHECK-G-UNKNOWN %s +; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=gfx1250 %s -o - | FileCheck -check-prefixes=CHECK-G-MESA3D %s + +declare i32 @llvm.amdgcn.cluster.workgroup.max.id.x() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.max.id.y() #0 +declare i32 @llvm.amdgcn.cluster.workgroup.max.id.z() #0 + +define amdgpu_kernel void @test_workgroup_max_id_x(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_x: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_x_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 4 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_x_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 4 :: v_dual_mov_b32 v1, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_y(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_y: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_y_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 5 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_y_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 5 :: v_dual_mov_b32 v1, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_z(ptr addrspace(1) %out) #1 { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_z: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-G-UNKNOWN-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: s_bfe_u32 s2, ttmp6, 0x40014 +; CHECK-G-MESA3D-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @test_workgroup_max_id_z_optimized(ptr addrspace(1) %out) "amdgpu-cluster-dims"="5,6,7" { +; CHECK-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-UNKNOWN: ; %bb.0: +; CHECK-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-UNKNOWN-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6 +; CHECK-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-UNKNOWN-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-UNKNOWN-NEXT: s_endpgm +; +; CHECK-MESA3D-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-MESA3D: .amd_kernel_code_t +; CHECK-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-MESA3D-NEXT: priority = 0 +; CHECK-MESA3D-NEXT: float_mode = 240 +; CHECK-MESA3D-NEXT: priv = 0 +; CHECK-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-MESA3D-NEXT: debug_mode = 0 +; CHECK-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-MESA3D-NEXT: enable_exception = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-MESA3D-NEXT: private_element_size = 1 +; CHECK-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-MESA3D-NEXT: wavefront_size = 5 +; CHECK-MESA3D-NEXT: call_convention = -1 +; CHECK-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-MESA3D-NEXT: ; %bb.0: +; CHECK-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-MESA3D-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 6 +; CHECK-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-MESA3D-NEXT: global_store_b32 v0, v1, s[0:1] +; CHECK-MESA3D-NEXT: s_endpgm +; +; CHECK-G-UNKNOWN-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-G-UNKNOWN: ; %bb.0: +; CHECK-G-UNKNOWN-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; CHECK-G-UNKNOWN-NEXT: v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0 +; CHECK-G-UNKNOWN-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-UNKNOWN-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-UNKNOWN-NEXT: s_endpgm +; +; CHECK-G-MESA3D-LABEL: test_workgroup_max_id_z_optimized: +; CHECK-G-MESA3D: .amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: amd_code_version_major = 1 +; CHECK-G-MESA3D-NEXT: amd_code_version_minor = 2 +; CHECK-G-MESA3D-NEXT: amd_machine_kind = 1 +; CHECK-G-MESA3D-NEXT: amd_machine_version_major = 12 +; CHECK-G-MESA3D-NEXT: amd_machine_version_minor = 5 +; CHECK-G-MESA3D-NEXT: amd_machine_version_stepping = 0 +; CHECK-G-MESA3D-NEXT: kernel_code_entry_byte_offset = 256 +; CHECK-G-MESA3D-NEXT: kernel_code_prefetch_byte_size = 0 +; CHECK-G-MESA3D-NEXT: granulated_workitem_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: granulated_wavefront_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: priority = 0 +; CHECK-G-MESA3D-NEXT: float_mode = 240 +; CHECK-G-MESA3D-NEXT: priv = 0 +; CHECK-G-MESA3D-NEXT: enable_dx10_clamp = 0 +; CHECK-G-MESA3D-NEXT: debug_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_ieee_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_wgp_mode = 0 +; CHECK-G-MESA3D-NEXT: enable_mem_ordered = 1 +; CHECK-G-MESA3D-NEXT: enable_fwd_progress = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 +; CHECK-G-MESA3D-NEXT: user_sgpr_count = 8 +; CHECK-G-MESA3D-NEXT: enable_trap_handler = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_x = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_y = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_id_z = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_workgroup_info = 0 +; CHECK-G-MESA3D-NEXT: enable_vgpr_workitem_id = 2 +; CHECK-G-MESA3D-NEXT: enable_exception_msb = 0 +; CHECK-G-MESA3D-NEXT: granulated_lds_size = 0 +; CHECK-G-MESA3D-NEXT: enable_exception = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_buffer = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_queue_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_kernarg_segment_ptr = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_dispatch_id = 1 +; CHECK-G-MESA3D-NEXT: enable_sgpr_flat_scratch_init = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_private_segment_size = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_x = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_y = 0 +; CHECK-G-MESA3D-NEXT: enable_sgpr_grid_workgroup_count_z = 0 +; CHECK-G-MESA3D-NEXT: enable_wavefront_size32 = 1 +; CHECK-G-MESA3D-NEXT: enable_ordered_append_gds = 0 +; CHECK-G-MESA3D-NEXT: private_element_size = 1 +; CHECK-G-MESA3D-NEXT: is_ptr64 = 1 +; CHECK-G-MESA3D-NEXT: is_dynamic_callstack = 0 +; CHECK-G-MESA3D-NEXT: is_debug_enabled = 0 +; CHECK-G-MESA3D-NEXT: is_xnack_enabled = 0 +; CHECK-G-MESA3D-NEXT: workitem_private_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: workgroup_group_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: gds_segment_byte_size = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_byte_size = 24 +; CHECK-G-MESA3D-NEXT: workgroup_fbarrier_count = 0 +; CHECK-G-MESA3D-NEXT: wavefront_sgpr_count = 6 +; CHECK-G-MESA3D-NEXT: workitem_vgpr_count = 2 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_vgpr_count = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_first = 0 +; CHECK-G-MESA3D-NEXT: reserved_sgpr_count = 0 +; CHECK-G-MESA3D-NEXT: debug_wavefront_private_segment_offset_sgpr = 0 +; CHECK-G-MESA3D-NEXT: debug_private_segment_buffer_sgpr = 0 +; CHECK-G-MESA3D-NEXT: kernarg_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: group_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: private_segment_alignment = 4 +; CHECK-G-MESA3D-NEXT: wavefront_size = 5 +; CHECK-G-MESA3D-NEXT: call_convention = -1 +; CHECK-G-MESA3D-NEXT: runtime_loader_kernel_symbol = 0 +; CHECK-G-MESA3D-NEXT: .end_amd_kernel_code_t +; CHECK-G-MESA3D-NEXT: ; %bb.0: +; CHECK-G-MESA3D-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; CHECK-G-MESA3D-NEXT: v_dual_mov_b32 v0, 6 :: v_dual_mov_b32 v1, 0 +; CHECK-G-MESA3D-NEXT: s_wait_kmcnt 0x0 +; CHECK-G-MESA3D-NEXT: global_store_b32 v1, v0, s[0:1] +; CHECK-G-MESA3D-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.cluster.workgroup.max.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!0 = !{i32 1, !"amdgpu_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 2554d99def57f..169a84ff1f86b 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -297,6 +297,6 @@ declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) -attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" } +attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GFX9ARCH: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll new file mode 100644 index 0000000000000..69439d49e588f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-opt.ll @@ -0,0 +1,390 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck -check-prefix=GFX1250-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck -check-prefix=GFX1250-GISEL %s + +define void @test_workgroup_id_x_non_kernel(ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, ttmp9, s1 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_x_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_x_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, ttmp9 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_not_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, ttmp9 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_x_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; GFX1250-SDAG-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_x_non_kernel_optimized_fixed: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.x() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel(ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_not_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_y_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; GFX1250-SDAG-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_y_non_kernel_optimized_fixed: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.y() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel(ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s0, s1, s0 +; GFX1250-GISEL-NEXT: s_getreg_b32 s3, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s3, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, s1, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel_optimized_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="1024,1024,1024" { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, s1 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s2, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel_optimized_not_used(ptr addrspace(1) %out) "amdgpu-cluster-dims"="0,0,0" { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_not_used: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + +define void @test_workgroup_id_z_non_kernel_optimized_fixed(ptr addrspace(1) %out) "amdgpu-cluster-dims"="2,1,2" { +; GFX1250-SDAG-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 15 +; GFX1250-SDAG-NEXT: s_bfe_u32 s1, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_and_b32 s0, s0, 0x1fffe +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s1, s0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-SDAG-NEXT: s_set_pc_i64 s[30:31] +; +; GFX1250-GISEL-LABEL: test_workgroup_id_z_non_kernel_optimized_fixed: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, s0, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v2, off +; GFX1250-GISEL-NEXT: s_set_pc_i64 s[30:31] + %id = call i32 @llvm.amdgcn.workgroup.id.z() + store i32 %id, ptr addrspace(1) %out + ret void +} + + +declare i32 @llvm.amdgcn.workgroup.id.x() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll new file mode 100644 index 0000000000000..497241cff392d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs < %s | FileCheck -check-prefix=GFX9-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel < %s | FileCheck -check-prefix=GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel < %s | FileCheck -check-prefix=GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1250 -global-isel < %s | FileCheck -check-prefix=GFX1250-GISEL %s + +define amdgpu_cs void @_amdgpu_cs_main() { +; GFX9-SDAG-LABEL: _amdgpu_cs_main: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: _amdgpu_cs_main: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: _amdgpu_cs_main: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: _amdgpu_cs_main: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: _amdgpu_cs_main: +; GFX1250-SDAG: ; %bb.0: ; %.entry +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_and_b32 s3, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s3, s2 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_lshr_b32 s5, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, 1 +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, s0 +; GFX1250-SDAG-NEXT: s_mul_i32 s0, s5, s4 +; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s5, s4 +; GFX1250-SDAG-NEXT: s_cselect_b32 s1, ttmp9, s1 +; GFX1250-SDAG-NEXT: s_cselect_b32 s2, s3, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: _amdgpu_cs_main: +; GFX1250-GISEL: ; %bb.0: ; %.entry +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s0, ttmp9, s1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s1, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s3, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s4, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s3, s1 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s4, s4, s1 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s1, s3, s4 +; GFX1250-GISEL-NEXT: s_bfe_u32 s3, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s4, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s3, s3, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s3, s4, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, s3 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s2, s4, s5 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @workgroup_id_no_clusters() "amdgpu-cluster-dims"="0,0,0" { +; GFX9-SDAG-LABEL: workgroup_id_no_clusters: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: workgroup_id_no_clusters: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: workgroup_id_no_clusters: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: workgroup_id_no_clusters: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_no_clusters: +; GFX1250-SDAG: ; %bb.0: ; %.entry +; GFX1250-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_no_clusters: +; GFX1250-GISEL: ; %bb.0: ; %.entry +; GFX1250-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @workgroup_id_optimized() "amdgpu-cluster-dims"="2,3,4" { +; GFX9-SDAG-LABEL: workgroup_id_optimized: +; GFX9-SDAG: ; %bb.0: ; %.entry +; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16 +; GFX9-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: workgroup_id_optimized: +; GFX9-GISEL: ; %bb.0: ; %.entry +; GFX9-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX9-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX9-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: workgroup_id_optimized: +; GFX12-SDAG: ; %bb.0: ; %.entry +; GFX12-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff +; GFX12-SDAG-NEXT: s_lshr_b32 s1, ttmp7, 16 +; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s0 +; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s1 +; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: workgroup_id_optimized: +; GFX12-GISEL: ; %bb.0: ; %.entry +; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9 +; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16 +; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_optimized: +; GFX1250-SDAG: ; %bb.0: ; %.entry +; GFX1250-SDAG-NEXT: s_lshl_b32 s0, ttmp9, 1 +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 14 +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_and_b32 s0, s2, 0x3fffc +; GFX1250-SDAG-NEXT: s_and_b32 s2, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_bfe_u32 s3, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_mul_i32 s2, s2, 3 +; GFX1250-SDAG-NEXT: s_bfe_u32 s4, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s4, s4, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s4 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s3 +; GFX1250-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_optimized: +; GFX1250-GISEL: ; %bb.0: ; %.entry +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_and_b32 s0, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s1, s1, 3 +; GFX1250-GISEL-NEXT: s_lshr_b32 s3, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_bfe_u32 s4, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_lshl1_add_u32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s2, s1 +; GFX1250-GISEL-NEXT: s_lshl2_add_u32 s2, s3, s4 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null +; GFX1250-GISEL-NEXT: s_endpgm +.entry: + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + %idy = call i32 @llvm.amdgcn.workgroup.id.y() + %idz = call i32 @llvm.amdgcn.workgroup.id.z() + %ielemx = insertelement <3 x i32> undef, i32 %idx, i64 0 + %ielemy = insertelement <3 x i32> %ielemx, i32 %idy, i64 1 + %ielemz = insertelement <3 x i32> %ielemy, i32 %idz, i64 2 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %ielemz, ptr addrspace(8) undef, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_cs void @caller() { +; GFX9-SDAG-LABEL: caller: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] +; GFX9-SDAG-NEXT: s_mov_b32 s8, s0 +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9-SDAG-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_add_u32 s8, s8, s0 +; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: caller: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] +; GFX9-GISEL-NEXT: s_mov_b32 s8, s0 +; GFX9-GISEL-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 +; GFX9-GISEL-NEXT: s_mov_b32 s4, callee@abs32@lo +; GFX9-GISEL-NEXT: s_mov_b32 s5, callee@abs32@hi +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_add_u32 s8, s8, s0 +; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] +; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX12-SDAG-LABEL: caller: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-SDAG-NEXT: s_endpgm +; +; GFX12-GISEL-LABEL: caller: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 +; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo +; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi +; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX12-GISEL-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: caller: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-SDAG-NEXT: s_mov_b32 s32, 0 +; GFX1250-SDAG-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s2, ttmp9, s1 +; GFX1250-SDAG-NEXT: s_mov_b64 s[0:1], callee@abs64 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-SDAG-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: caller: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: s_mov_b32 s32, 0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s2, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s2, ttmp9, s1 +; GFX1250-GISEL-NEXT: s_mov_b64 s[0:1], callee@abs64 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-NEXT: s_swap_pc_i64 s[30:31], s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm + %idx = call i32 @llvm.amdgcn.workgroup.id.x() + call amdgpu_gfx void @callee(i32 %idx) + ret void +} + +declare amdgpu_gfx void @callee(i32) + +declare i32 @llvm.amdgcn.workgroup.id.x() +declare i32 @llvm.amdgcn.workgroup.id.y() +declare i32 @llvm.amdgcn.workgroup.id.z() +declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll index 25609e881254e..b2bcb74e4184f 100644 --- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll @@ -4089,32 +4089,44 @@ define amdgpu_kernel void @compute_mad(ptr addrspace(4) %i18, ptr addrspace(4) % ; GFX1250-NEXT: s_add_co_i32 s0, s10, 1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: v_mul_lo_u32 v1, s0, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_dual_add_nc_u32 v2, s0, v1 :: v_dual_add_nc_u32 v1, 1, v1 ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_b32 s4, ttmp6, 15 +; GFX1250-NEXT: s_getreg_b32 s5, hwreg(HW_REG_IB_STS2, 6, 4) ; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-NEXT: v_mul_lo_u32 v3, v2, v1 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x4 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_bfe_u32 s3, ttmp6, 0x4000c ; GFX1250-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX1250-NEXT: s_add_co_i32 s3, s3, 1 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_mul_i32 s3, ttmp9, s3 ; GFX1250-NEXT: v_add_nc_u32_e32 v1, v3, v1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_add_co_i32 s4, s4, s3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_mul_lo_u32 v1, v1, v2 ; GFX1250-NEXT: v_add_nc_u32_e32 v2, 1, v3 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff +; GFX1250-NEXT: s_cmp_eq_u32 s5, 0 ; GFX1250-NEXT: v_mul_lo_u32 v3, v1, v2 -; GFX1250-NEXT: v_mad_u32 v0, ttmp9, s2, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1250-NEXT: s_cselect_b32 s3, ttmp9, s4 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_mad_u32 v0, s3, s2, v0 ; GFX1250-NEXT: v_add_nc_u32_e32 v2, v3, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_mul_lo_u32 v2, v2, v1 ; GFX1250-NEXT: v_mov_b32_e32 v1, 0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_add_nc_u64_e32 v[0:1], s[0:1], v[0:1] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_mad_u32 v3, v2, v3, v2 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 2, s[8:9] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-NEXT: v_mad_u32 v2, v3, v2, v3 ; GFX1250-NEXT: global_store_b32 v[0:1], v2, off ; GFX1250-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll index 7a64e55abb8d3..afca83a7e1c36 100644 --- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1200 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1200 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=0 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel=1 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { ; @@ -15,6 +17,50 @@ define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) { ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm ; +; GFX1200-LABEL: workgroup_id_x: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_x: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_and_b32 s3, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_add_co_i32 s2, s2, 1 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_mul_i32 s2, ttmp9, s2 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: s_add_co_i32 s3, s3, s2 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s2, ttmp9, s3 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_x: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 +; GFX1250-GISEL-NEXT: s_bfe_u32 s2, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s3, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s2, s2, 1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_getreg_b32 s4, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s2, ttmp9, s2 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s3, s3, s2 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s2, ttmp9, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm ; GFX12-LABEL: workgroup_id_x: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 @@ -41,6 +87,74 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace ; GFX9-NEXT: global_store_dword v1, v2, s[2:3] ; GFX9-NEXT: s_endpgm ; +; GFX1200-LABEL: workgroup_id_xy: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1200-NEXT: s_and_b32 s4, ttmp7, 0xffff +; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX1200-NEXT: v_mov_b32_e32 v2, s4 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: s_clause 0x1 +; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1200-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_xy: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-SDAG-NEXT: s_bfe_u32 s6, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_and_b32 s4, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s6, s6, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s7, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_mul_i32 s5, s4, s6 +; GFX1250-SDAG-NEXT: s_bfe_u32 s6, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, 1 +; GFX1250-SDAG-NEXT: s_add_co_i32 s6, s6, s5 +; GFX1250-SDAG-NEXT: s_and_b32 s5, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_mul_i32 s7, ttmp9, s7 +; GFX1250-SDAG-NEXT: s_getreg_b32 s8, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s5, s5, s7 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s8, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s5, ttmp9, s5 +; GFX1250-SDAG-NEXT: s_cselect_b32 s4, s4, s6 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s5 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v2, s4 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_clause 0x1 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_xy: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_bfe_u32 s6, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s6, s6, 1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_and_b32 s4, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_mul_i32 s5, ttmp9, s6 +; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_add_co_i32 s4, s4, s5 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s4, ttmp9, s4 +; GFX1250-GISEL-NEXT: s_bfe_u32 s5, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s7, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s5, s5, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s8, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s5, s7, s5 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX1250-GISEL-NEXT: s_add_co_i32 s8, s8, s5 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s4, s7, s8 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v2, s4 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_clause 0x1 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1250-GISEL-NEXT: s_endpgm ; GFX12-LABEL: workgroup_id_xy: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 @@ -77,6 +191,99 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac ; GFX9-NEXT: global_store_dword v1, v0, s[4:5] ; GFX9-NEXT: s_endpgm ; +; GFX1200-LABEL: workgroup_id_xyz: +; GFX1200: ; %bb.0: +; GFX1200-NEXT: s_clause 0x1 +; GFX1200-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1200-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 +; GFX1200-NEXT: s_and_b32 s6, ttmp7, 0xffff +; GFX1200-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0 +; GFX1200-NEXT: s_lshr_b32 s7, ttmp7, 16 +; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1200-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GFX1200-NEXT: s_wait_kmcnt 0x0 +; GFX1200-NEXT: s_clause 0x2 +; GFX1200-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1200-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1200-NEXT: global_store_b32 v1, v3, s[4:5] +; GFX1200-NEXT: s_endpgm +; +; GFX1250-SDAG-LABEL: workgroup_id_xyz: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_bfe_u32 s0, ttmp6, 0x40014 +; GFX1250-SDAG-NEXT: s_lshr_b32 s6, ttmp7, 16 +; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s0, 1 +; GFX1250-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 +; GFX1250-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 +; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40010 +; GFX1250-SDAG-NEXT: s_mul_i32 s7, s6, s7 +; GFX1250-SDAG-NEXT: s_bfe_u32 s8, ttmp6, 0x40008 +; GFX1250-SDAG-NEXT: s_and_b32 s10, ttmp7, 0xffff +; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, 1 +; GFX1250-SDAG-NEXT: s_bfe_u32 s11, ttmp6, 0x4000c +; GFX1250-SDAG-NEXT: s_add_co_i32 s8, s8, s7 +; GFX1250-SDAG-NEXT: s_mul_i32 s7, s10, s9 +; GFX1250-SDAG-NEXT: s_bfe_u32 s9, ttmp6, 0x40004 +; GFX1250-SDAG-NEXT: s_add_co_i32 s11, s11, 1 +; GFX1250-SDAG-NEXT: s_add_co_i32 s9, s9, s7 +; GFX1250-SDAG-NEXT: s_and_b32 s7, ttmp6, 15 +; GFX1250-SDAG-NEXT: s_mul_i32 s11, ttmp9, s11 +; GFX1250-SDAG-NEXT: s_getreg_b32 s12, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-SDAG-NEXT: s_add_co_i32 s7, s7, s11 +; GFX1250-SDAG-NEXT: s_cmp_eq_u32 s12, 0 +; GFX1250-SDAG-NEXT: s_cselect_b32 s7, ttmp9, s7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s7 +; GFX1250-SDAG-NEXT: s_cselect_b32 s7, s10, s9 +; GFX1250-SDAG-NEXT: s_cselect_b32 s6, s6, s8 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s6 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: s_clause 0x2 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: global_store_b32 v0, v2, s[2:3] +; GFX1250-SDAG-NEXT: global_store_b32 v0, v3, s[4:5] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: workgroup_id_xyz: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x4000c +; GFX1250-GISEL-NEXT: s_and_b32 s1, ttmp6, 15 +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_IB_STS2, 6, 4) +; GFX1250-GISEL-NEXT: s_mul_i32 s0, ttmp9, s0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-GISEL-NEXT: s_add_co_i32 s1, s1, s0 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s7, ttmp9, s1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s0, ttmp6, 0x40010 +; GFX1250-GISEL-NEXT: s_and_b32 s8, ttmp7, 0xffff +; GFX1250-GISEL-NEXT: s_add_co_i32 s0, s0, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40004 +; GFX1250-GISEL-NEXT: s_mul_i32 s10, s8, s0 +; GFX1250-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x10 +; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, s10 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: v_mov_b32_e32 v0, s7 +; GFX1250-GISEL-NEXT: s_cselect_b32 s8, s8, s9 +; GFX1250-GISEL-NEXT: s_bfe_u32 s9, ttmp6, 0x40014 +; GFX1250-GISEL-NEXT: s_lshr_b32 s10, ttmp7, 16 +; GFX1250-GISEL-NEXT: s_add_co_i32 s9, s9, 1 +; GFX1250-GISEL-NEXT: s_bfe_u32 s11, ttmp6, 0x40008 +; GFX1250-GISEL-NEXT: s_mul_i32 s9, s10, s9 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-NEXT: s_add_co_i32 s11, s11, s9 +; GFX1250-GISEL-NEXT: s_cmp_eq_u32 s6, 0 +; GFX1250-GISEL-NEXT: s_cselect_b32 s6, s10, s11 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s8 :: v_dual_mov_b32 v3, s6 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: s_clause 0x2 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: global_store_b32 v1, v2, s[2:3] +; GFX1250-GISEL-NEXT: global_store_b32 v1, v3, s[4:5] +; GFX1250-GISEL-NEXT: s_endpgm ; GFX12-LABEL: workgroup_id_xyz: ; GFX12: ; %bb.0: ; GFX12-NEXT: s_clause 0x1 @@ -107,7 +314,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x() declare i32 @llvm.amdgcn.workgroup.id.y() declare i32 @llvm.amdgcn.workgroup.id.z() ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX12-GISEL: {{.*}} -; GFX12-SDAG: {{.*}} +; GFX1250: {{.*}} ; GFX9-GISEL: {{.*}} ; GFX9-SDAG: {{.*}}