@@ -622,12 +622,13 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
622622
623623 int64_t PGRM_Rsrc3 = 1 ;
624624 bool EvaluatableRsrc3 =
625- CurrentProgramInfo.ComputePGMRSrc3GFX90A ->evaluateAsAbsolute (PGRM_Rsrc3);
625+ CurrentProgramInfo.ComputePGMRSrc3 ->evaluateAsAbsolute (PGRM_Rsrc3);
626626 (void )PGRM_Rsrc3;
627627 (void )EvaluatableRsrc3;
628- assert (STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
628+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
629+ STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
629630 static_cast <uint64_t >(PGRM_Rsrc3) == 0 );
630- KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A ;
631+ KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3 ;
631632
632633 KernelDescriptor.kernarg_preload = MCConstantExpr::create (
633634 AMDGPU::hasKernargPreload (STM) ? Info->getNumKernargPreloadedSGPRs () : 0 ,
@@ -822,22 +823,22 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
822823 false );
823824
824825 [[maybe_unused]] int64_t PGMRSrc3;
825- assert (STM.hasGFX90AInsts () ||
826- (CurrentProgramInfo. ComputePGMRSrc3GFX90A -> evaluateAsAbsolute (
827- PGMRSrc3) &&
826+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
827+ STM. hasGFX90AInsts () ||
828+ (CurrentProgramInfo. ComputePGMRSrc3 -> evaluateAsAbsolute ( PGMRSrc3) &&
828829 static_cast <uint64_t >(PGMRSrc3) == 0 ));
829830 if (STM.hasGFX90AInsts ()) {
830831 OutStreamer->emitRawComment (
831832 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
832833 getMCExprStr (MCKernelDescriptor::bits_get (
833- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
834+ CurrentProgramInfo.ComputePGMRSrc3 ,
834835 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
835836 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
836837 false );
837838 OutStreamer->emitRawComment (
838839 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
839840 getMCExprStr (MCKernelDescriptor::bits_get (
840- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
841+ CurrentProgramInfo.ComputePGMRSrc3 ,
841842 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
842843 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
843844 false );
@@ -1229,24 +1230,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
12291230 ProgInfo.LdsSize = STM.isAmdHsaOS () ? 0 : ProgInfo.LDSBlocks ;
12301231 ProgInfo.EXCPEnable = 0 ;
12311232
1233+ // return ((Dst & ~Mask) | (Value << Shift))
1234+ auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235+ uint32_t Shift) {
1236+ const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237+ const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238+ Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239+ Dst = MCBinaryExpr::createOr (Dst, MCBinaryExpr::createShl (Value, Shft, Ctx),
1240+ Ctx);
1241+ return Dst;
1242+ };
1243+
12321244 if (STM.hasGFX90AInsts ()) {
1233- // return ((Dst & ~Mask) | (Value << Shift))
1234- auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235- uint32_t Shift) {
1236- const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237- const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238- Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239- Dst = MCBinaryExpr::createOr (
1240- Dst, MCBinaryExpr::createShl (Value, Shft, Ctx), Ctx);
1241- return Dst;
1242- };
1243-
1244- ProgInfo.ComputePGMRSrc3GFX90A =
1245- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , ProgInfo.AccumOffset ,
1245+ ProgInfo.ComputePGMRSrc3 =
1246+ SetBits (ProgInfo.ComputePGMRSrc3 , ProgInfo.AccumOffset ,
12461247 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
12471248 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1248- ProgInfo.ComputePGMRSrc3GFX90A =
1249- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , CreateExpr (ProgInfo.TgSplit ),
1249+ ProgInfo.ComputePGMRSrc3 =
1250+ SetBits (ProgInfo.ComputePGMRSrc3 , CreateExpr (ProgInfo.TgSplit ),
12501251 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
12511252 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
12521253 }
@@ -1267,6 +1268,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
12671268 " , final occupancy is " + Twine (Occupancy));
12681269 F.getContext ().diagnose (Diag);
12691270 }
1271+
1272+ if (isGFX11Plus (STM)) {
1273+ uint32_t CodeSizeInBytes =
1274+ (uint32_t )std::min (ProgInfo.getFunctionCodeSize (MF),
1275+ (uint64_t )std::numeric_limits<uint32_t >::max ());
1276+ uint32_t CodeSizeInLines = divideCeil (CodeSizeInBytes, 128 );
1277+ uint32_t Field, Shift, Width;
1278+ if (isGFX11 (STM)) {
1279+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1280+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1281+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1282+ } else {
1283+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1284+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1285+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1286+ }
1287+ uint64_t InstPrefSize = std::min (CodeSizeInLines, (1u << Width) - 1 );
1288+ ProgInfo.ComputePGMRSrc3 = SetBits (ProgInfo.ComputePGMRSrc3 ,
1289+ CreateExpr (InstPrefSize), Field, Shift);
1290+ }
12701291}
12711292
12721293static unsigned getRsrcReg (CallingConv::ID CallConv) {
0 commit comments