@@ -1230,18 +1230,18 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1230
1230
ProgInfo.LdsSize = STM.isAmdHsaOS () ? 0 : ProgInfo.LDSBlocks ;
1231
1231
ProgInfo.EXCPEnable = 0 ;
1232
1232
1233
- if (STM.hasGFX90AInsts ()) {
1234
- // return ((Dst & ~Mask) | (Value << Shift))
1235
- auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1236
- uint32_t Shift) {
1237
- const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1238
- const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1239
- Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1240
- Dst = MCBinaryExpr::createOr (
1241
- Dst, MCBinaryExpr::createShl (Value, Shft, Ctx), Ctx);
1242
- return Dst;
1243
- };
1233
+ // return ((Dst & ~Mask) | (Value << Shift))
1234
+ auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,
1235
+ uint32_t Shift) {
1236
+ const auto *Shft = MCConstantExpr::create (Shift, Ctx);
1237
+ const auto *Msk = MCConstantExpr::create (Mask, Ctx);
1238
+ Dst = MCBinaryExpr::createAnd (Dst, MCUnaryExpr::createNot (Msk, Ctx), Ctx);
1239
+ Dst = MCBinaryExpr::createOr (Dst, MCBinaryExpr::createShl (Value, Shft, Ctx),
1240
+ Ctx);
1241
+ return Dst;
1242
+ };
1244
1243
1244
+ if (STM.hasGFX90AInsts ()) {
1245
1245
ProgInfo.ComputePGMRSrc3 =
1246
1246
SetBits (ProgInfo.ComputePGMRSrc3 , ProgInfo.AccumOffset ,
1247
1247
amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
@@ -1268,6 +1268,26 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
1268
1268
" , final occupancy is " + Twine (Occupancy));
1269
1269
F.getContext ().diagnose (Diag);
1270
1270
}
1271
+
1272
+ if (isGFX11Plus (STM)) {
1273
+ uint32_t CodeSizeInBytes = (uint32_t )std::min (
1274
+ ProgInfo.getFunctionCodeSize (MF, true /* IsLowerBound */ ),
1275
+ (uint64_t )std::numeric_limits<uint32_t >::max ());
1276
+ uint32_t CodeSizeInLines = divideCeil (CodeSizeInBytes, 128 );
1277
+ uint32_t Field, Shift, Width;
1278
+ if (isGFX11 (STM)) {
1279
+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;
1280
+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;
1281
+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;
1282
+ } else {
1283
+ Field = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;
1284
+ Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;
1285
+ Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;
1286
+ }
1287
+ uint64_t InstPrefSize = std::min (CodeSizeInLines, (1u << Width) - 1 );
1288
+ ProgInfo.ComputePGMRSrc3 = SetBits (ProgInfo.ComputePGMRSrc3 ,
1289
+ CreateExpr (InstPrefSize), Field, Shift);
1290
+ }
1271
1291
}
1272
1292
1273
1293
static unsigned getRsrcReg (CallingConv::ID CallConv) {
0 commit comments