@@ -622,12 +622,13 @@ AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,
622622
623623 int64_t PGRM_Rsrc3 = 1 ;
624624 bool EvaluatableRsrc3 =
625- CurrentProgramInfo.ComputePGMRSrc3GFX90A ->evaluateAsAbsolute (PGRM_Rsrc3);
625+ CurrentProgramInfo.ComputePGMRSrc3 ->evaluateAsAbsolute (PGRM_Rsrc3);
626626 (void )PGRM_Rsrc3;
627627 (void )EvaluatableRsrc3;
628- assert (STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
628+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
629+ STM.hasGFX90AInsts () || !EvaluatableRsrc3 ||
629630 static_cast <uint64_t >(PGRM_Rsrc3) == 0 );
630- KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A ;
631+ KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3 ;
631632
632633 KernelDescriptor.kernarg_preload = MCConstantExpr::create (
633634 AMDGPU::hasKernargPreload (STM) ? Info->getNumKernargPreloadedSGPRs () : 0 ,
@@ -748,7 +749,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
748749 RI.getSymbol (CurrentFnSym->getName (), RIK::RIK_PrivateSegSize,
749750 OutContext, IsLocal)
750751 ->getVariableValue (),
751- getFunctionCodeSize (MF), MFI);
752+ CurrentProgramInfo. getFunctionCodeSize (MF), MFI);
752753 return false ;
753754 }
754755
@@ -757,7 +758,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
757758 CurrentProgramInfo.NumArchVGPR ,
758759 STM.hasMAIInsts () ? CurrentProgramInfo.NumAccVGPR : nullptr ,
759760 CurrentProgramInfo.NumVGPR , CurrentProgramInfo.NumSGPR ,
760- CurrentProgramInfo.ScratchSize , getFunctionCodeSize (MF), MFI);
761+ CurrentProgramInfo.ScratchSize ,
762+ CurrentProgramInfo.getFunctionCodeSize (MF), MFI);
761763
762764 OutStreamer->emitRawComment (
763765 " FloatMode: " + Twine (CurrentProgramInfo.FloatMode ), false );
@@ -821,22 +823,22 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
821823 false );
822824
823825 [[maybe_unused]] int64_t PGMRSrc3;
824- assert (STM.hasGFX90AInsts () ||
825- (CurrentProgramInfo. ComputePGMRSrc3GFX90A -> evaluateAsAbsolute (
826- PGMRSrc3) &&
826+ assert (STM.getGeneration () >= AMDGPUSubtarget::GFX10 ||
827+ STM. hasGFX90AInsts () ||
828+ (CurrentProgramInfo. ComputePGMRSrc3 -> evaluateAsAbsolute ( PGMRSrc3) &&
827829 static_cast <uint64_t >(PGMRSrc3) == 0 ));
828830 if (STM.hasGFX90AInsts ()) {
829831 OutStreamer->emitRawComment (
830832 " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +
831833 getMCExprStr (MCKernelDescriptor::bits_get (
832- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
834+ CurrentProgramInfo.ComputePGMRSrc3 ,
833835 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
834836 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),
835837 false );
836838 OutStreamer->emitRawComment (
837839 " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +
838840 getMCExprStr (MCKernelDescriptor::bits_get (
839- CurrentProgramInfo.ComputePGMRSrc3GFX90A ,
841+ CurrentProgramInfo.ComputePGMRSrc3 ,
840842 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,
841843 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),
842844 false );
@@ -893,27 +895,6 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
893895 }
894896}
895897
896- uint64_t AMDGPUAsmPrinter::getFunctionCodeSize (const MachineFunction &MF) const {
897- const GCNSubtarget &STM = MF.getSubtarget <GCNSubtarget>();
898- const SIInstrInfo *TII = STM.getInstrInfo ();
899-
900- uint64_t CodeSize = 0 ;
901-
902- for (const MachineBasicBlock &MBB : MF) {
903- for (const MachineInstr &MI : MBB) {
904- // TODO: CodeSize should account for multiple functions.
905-
906- // TODO: Should we count size of debug info?
907- if (MI.isDebugInstr ())
908- continue ;
909-
910- CodeSize += TII->getInstSizeInBytes (MI);
911- }
912- }
913-
914- return CodeSize;
915- }
916-
917898// AccumOffset computed for the MCExpr equivalent of:
918899// alignTo(std::max(1, NumVGPR), 4) / 4 - 1;
919900static const MCExpr *computeAccumOffset (const MCExpr *NumVGPR, MCContext &Ctx) {
@@ -1249,24 +1230,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
12491230 ProgInfo.LdsSize = STM.isAmdHsaOS () ? 0 : ProgInfo.LDSBlocks ;
12501231 ProgInfo.EXCPEnable = 0 ;
12511232
1233+ // return ((Dst & ~Mask) | (Value << Shift))
1234+ auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235+ uint32_t Shift) {
1236+ const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237+ const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238+ Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239+ Dst = MCBinaryExpr::createOr (Dst, MCBinaryExpr::createShl (Value, Shft, Ctx),
1240+ Ctx);
1241+ return Dst;
1242+ };
1243+
12521244 if (STM.hasGFX90AInsts ()) {
1253- // return ((Dst & ~Mask) | (Value << Shift))
1254- auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1255- uint32_t Shift) {
1256- const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1257- const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1258- Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1259- Dst = MCBinaryExpr::createOr (
1260- Dst, MCBinaryExpr::createShl (Value, Shft, Ctx), Ctx);
1261- return Dst;
1262- };
1263-
1264- ProgInfo.ComputePGMRSrc3GFX90A =
1265- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , ProgInfo.AccumOffset ,
1245+ ProgInfo.ComputePGMRSrc3 =
1246+ SetBits (ProgInfo.ComputePGMRSrc3 , ProgInfo.AccumOffset ,
12661247 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
12671248 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);
1268- ProgInfo.ComputePGMRSrc3GFX90A =
1269- SetBits (ProgInfo.ComputePGMRSrc3GFX90A , CreateExpr (ProgInfo.TgSplit ),
1249+ ProgInfo.ComputePGMRSrc3 =
1250+ SetBits (ProgInfo.ComputePGMRSrc3 , CreateExpr (ProgInfo.TgSplit ),
12701251 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
12711252 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);
12721253 }
@@ -1287,6 +1268,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
12871268 " , final occupancy is " + Twine (Occupancy));
12881269 F.getContext ().diagnose (Diag);
12891270 }
1271+
1272+ if (isGFX11Plus (STM)) {
1273+ uint32_t CodeSizeInBytes =
1274+ (uint32_t )std::min (ProgInfo.getFunctionCodeSize (MF),
1275+ (uint64_t )std::numeric_limits<uint32_t >::max ());
1276+ uint32_t CodeSizeInLines = divideCeil (CodeSizeInBytes, 128 );
1277+ uint32_t Field, Shift, Width;
1278+ if (isGFX11 (STM)) {
1279+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1280+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1281+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1282+ } else {
1283+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1284+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1285+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1286+ }
1287+ uint64_t InstPrefSize = std::min (CodeSizeInLines, (1u << Width) - 1 );
1288+ ProgInfo.ComputePGMRSrc3 = SetBits (ProgInfo.ComputePGMRSrc3 ,
1289+ CreateExpr (InstPrefSize), Field, Shift);
1290+ }
12901291}
12911292
12921293static unsigned getRsrcReg (CallingConv::ID CallConv) {
0 commit comments