@@ -997,89 +997,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
997997 const Function &F = MF.getFunction ();
998998
999999 // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
1000- // dispatch registers are function args.
1001- unsigned WaveDispatchNumSGPR = 0 , WaveDispatchNumVGPR = 0 ;
1002-
1003- if (isShader (F.getCallingConv ())) {
1004- bool IsPixelShader =
1005- F.getCallingConv () == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS ();
1006-
1007- // Calculate the number of VGPR registers based on the SPI input registers
1008- uint32_t InputEna = 0 ;
1009- uint32_t InputAddr = 0 ;
1010- unsigned LastEna = 0 ;
1011-
1012- if (IsPixelShader) {
1013- // Note for IsPixelShader:
1014- // By this stage, all enabled inputs are tagged in InputAddr as well.
1015- // We will use InputAddr to determine whether the input counts against the
1016- // vgpr total and only use the InputEnable to determine the last input
1017- // that is relevant - if extra arguments are used, then we have to honour
1018- // the InputAddr for any intermediate non-enabled inputs.
1019- InputEna = MFI->getPSInputEnable ();
1020- InputAddr = MFI->getPSInputAddr ();
1021-
1022- // We only need to consider input args up to the last used arg.
1023- assert ((InputEna || InputAddr) &&
1024- " PSInputAddr and PSInputEnable should "
1025- " never both be 0 for AMDGPU_PS shaders" );
1026- // There are some rare circumstances where InputAddr is non-zero and
1027- // InputEna can be set to 0. In this case we default to setting LastEna
1028- // to 1.
1029- LastEna = InputEna ? llvm::Log2_32 (InputEna) + 1 : 1 ;
1030- }
1000+ // dispatch registers as function args.
1001+ unsigned WaveDispatchNumSGPR = MFI->getNumWaveDispatchSGPRs (),
1002+ WaveDispatchNumVGPR = MFI->getNumWaveDispatchVGPRs ();
10311003
1032- // FIXME: We should be using the number of registers determined during
1033- // calling convention lowering to legalize the types.
1034- const DataLayout &DL = F.getDataLayout ();
1035- unsigned PSArgCount = 0 ;
1036- unsigned IntermediateVGPR = 0 ;
1037- for (auto &Arg : F.args ()) {
1038- unsigned NumRegs = (DL.getTypeSizeInBits (Arg.getType ()) + 31 ) / 32 ;
1039- if (Arg.hasAttribute (Attribute::InReg)) {
1040- WaveDispatchNumSGPR += NumRegs;
1041- } else {
1042- // If this is a PS shader and we're processing the PS Input args (first
1043- // 16 VGPR), use the InputEna and InputAddr bits to define how many
1044- // VGPRs are actually used.
1045- // Any extra VGPR arguments are handled as normal arguments (and
1046- // contribute to the VGPR count whether they're used or not).
1047- if (IsPixelShader && PSArgCount < 16 ) {
1048- if ((1 << PSArgCount) & InputAddr) {
1049- if (PSArgCount < LastEna)
1050- WaveDispatchNumVGPR += NumRegs;
1051- else
1052- IntermediateVGPR += NumRegs;
1053- }
1054- PSArgCount++;
1055- } else {
1056- // If there are extra arguments we have to include the allocation for
1057- // the non-used (but enabled with InputAddr) input arguments
1058- if (IntermediateVGPR) {
1059- WaveDispatchNumVGPR += IntermediateVGPR;
1060- IntermediateVGPR = 0 ;
1061- }
1062- WaveDispatchNumVGPR += NumRegs;
1063- }
1064- }
1065- }
1004+ if (WaveDispatchNumSGPR) {
10661005 ProgInfo.NumSGPR = AMDGPUMCExpr::createMax (
1067- {ProgInfo.NumSGPR , CreateExpr (WaveDispatchNumSGPR)}, Ctx);
1006+ {ProgInfo.NumSGPR ,
1007+ MCBinaryExpr::createAdd (CreateExpr (WaveDispatchNumSGPR), ExtraSGPRs,
1008+ Ctx)},
1009+ Ctx);
1010+ }
10681011
1012+ if (WaveDispatchNumVGPR) {
10691013 ProgInfo.NumArchVGPR = AMDGPUMCExpr::createMax (
10701014 {ProgInfo.NumVGPR , CreateExpr (WaveDispatchNumVGPR)}, Ctx);
10711015
10721016 ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR (
10731017 ProgInfo.NumAccVGPR , ProgInfo.NumArchVGPR , Ctx);
1074- } else if (isKernel (F.getCallingConv ()) &&
1075- MFI->getNumKernargPreloadedSGPRs ()) {
1076- // Consider cases where the total number of UserSGPRs with trailing
1077- // allocated preload SGPRs, is greater than the number of explicitly
1078- // referenced SGPRs.
1079- const MCExpr *UserPlusExtraSGPRs = MCBinaryExpr::createAdd (
1080- CreateExpr (MFI->getNumUserSGPRs ()), ExtraSGPRs, Ctx);
1081- ProgInfo.NumSGPR =
1082- AMDGPUMCExpr::createMax ({ProgInfo.NumSGPR , UserPlusExtraSGPRs}, Ctx);
10831018 }
10841019
10851020 // Adjust number of registers used to meet default/requested minimum/maximum
0 commit comments