@@ -452,15 +452,17 @@ void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {
452452 unsigned MaxWaves = MFI.getMaxWavesPerEU ();
453453 uint64_t TotalNumVgpr =
454454 getTotalNumVGPRs (STM.hasGFX90AInsts (), NumAgpr, NumVgpr);
455- uint64_t NumVGPRsForWavesPerEU = std::max (
456- {TotalNumVgpr, (uint64_t )1 , (uint64_t )STM.getMinNumVGPRs (MaxWaves)});
455+ uint64_t NumVGPRsForWavesPerEU =
456+ std::max ({TotalNumVgpr, (uint64_t )1 ,
457+ (uint64_t )STM.getMinNumVGPRs (
458+ MaxWaves, MFI.getDynamicVGPRBlockSize ())});
457459 uint64_t NumSGPRsForWavesPerEU = std::max (
458460 {NumSgpr, (uint64_t )1 , (uint64_t )STM.getMinNumSGPRs (MaxWaves)});
459461 const MCExpr *OccupancyExpr = AMDGPUMCExpr::createOccupancy (
460462 STM.getOccupancyWithWorkGroupSizes (*MF).second ,
461463 MCConstantExpr::create (NumSGPRsForWavesPerEU, OutContext),
462- MCConstantExpr::create (NumVGPRsForWavesPerEU, OutContext), STM,
463- OutContext);
464+ MCConstantExpr::create (NumVGPRsForWavesPerEU, OutContext),
465+ MFI. getDynamicVGPRBlockSize (), STM, OutContext);
464466 uint64_t Occupancy;
465467
466468 const auto [MinWEU, MaxWEU] = AMDGPU::getIntegerPairAttribute (
@@ -1082,7 +1084,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
10821084 Ctx);
10831085 ProgInfo.NumVGPRsForWavesPerEU =
10841086 AMDGPUMCExpr::createMax ({ProgInfo.NumVGPR , CreateExpr (1ul ),
1085- CreateExpr (STM.getMinNumVGPRs (MaxWaves))},
1087+ CreateExpr (STM.getMinNumVGPRs (
1088+ MaxWaves, MFI->getDynamicVGPRBlockSize ()))},
10861089 Ctx);
10871090
10881091 if (STM.getGeneration () <= AMDGPUSubtarget::SEA_ISLANDS ||
@@ -1256,7 +1259,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
12561259
12571260 ProgInfo.Occupancy = AMDGPUMCExpr::createOccupancy (
12581261 STM.computeOccupancy (F, ProgInfo.LDSSize ).second ,
1259- ProgInfo.NumSGPRsForWavesPerEU , ProgInfo.NumVGPRsForWavesPerEU , STM, Ctx);
1262+ ProgInfo.NumSGPRsForWavesPerEU , ProgInfo.NumVGPRsForWavesPerEU ,
1263+ MFI->getDynamicVGPRBlockSize (), STM, Ctx);
12601264
12611265 const auto [MinWEU, MaxWEU] =
12621266 AMDGPU::getIntegerPairAttribute (F, " amdgpu-waves-per-eu" , {0 , 0 }, true );
@@ -1405,7 +1409,8 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
14051409// Helper function to add common PAL Metadata 3.0+
14061410static void EmitPALMetadataCommon (AMDGPUPALMetadata *MD,
14071411 const SIProgramInfo &CurrentProgramInfo,
1408- CallingConv::ID CC, const GCNSubtarget &ST) {
1412+ CallingConv::ID CC, const GCNSubtarget &ST,
1413+ unsigned DynamicVGPRBlockSize) {
14091414 if (ST.hasIEEEMode ())
14101415 MD->setHwStage (CC, " .ieee_mode" , (bool )CurrentProgramInfo.IEEEMode );
14111416
@@ -1417,7 +1422,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
14171422 (bool )CurrentProgramInfo.TrapHandlerEnable );
14181423 MD->setHwStage (CC, " .excp_en" , CurrentProgramInfo.EXCPEnable );
14191424
1420- if (ST. isDynamicVGPREnabled () )
1425+ if (DynamicVGPRBlockSize != 0 )
14211426 MD->setComputeRegisters (" .dynamic_vgpr_en" , true );
14221427 }
14231428
@@ -1444,7 +1449,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
14441449 // For targets that support dynamic VGPRs, set the number of saved dynamic
14451450 // VGPRs (if any) in the PAL metadata.
14461451 const GCNSubtarget &STM = MF.getSubtarget <GCNSubtarget>();
1447- if (STM. isDynamicVGPREnabled () &&
1452+ if (MFI-> isDynamicVGPREnabled () &&
14481453 MFI->getScratchReservedForDynamicVGPRs () > 0 )
14491454 MD->setHwStage (CC, " .dynamic_vgpr_saved_count" ,
14501455 MFI->getScratchReservedForDynamicVGPRs () / 4 );
@@ -1470,7 +1475,8 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
14701475 MD->setHwStage (CC, " .debug_mode" , (bool )CurrentProgramInfo.DebugMode );
14711476 MD->setHwStage (CC, " .scratch_en" , msgpack::Type::Boolean,
14721477 CurrentProgramInfo.ScratchEnable );
1473- EmitPALMetadataCommon (MD, CurrentProgramInfo, CC, STM);
1478+ EmitPALMetadataCommon (MD, CurrentProgramInfo, CC, STM,
1479+ MFI->getDynamicVGPRBlockSize ());
14741480 }
14751481
14761482 // ScratchSize is in bytes, 16 aligned.
@@ -1541,7 +1547,9 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
15411547 MD->setRsrc2 (CallingConv::AMDGPU_CS,
15421548 CurrentProgramInfo.getComputePGMRSrc2 (Ctx), Ctx);
15431549 } else {
1544- EmitPALMetadataCommon (MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
1550+ EmitPALMetadataCommon (
1551+ MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST,
1552+ MF.getInfo <SIMachineFunctionInfo>()->getDynamicVGPRBlockSize ());
15451553 }
15461554
15471555 // Set optional info
0 commit comments