|
31 | 31 |
|
32 | 32 | using namespace llvm; |
33 | 33 |
|
| 34 | +static cl::opt<bool> |
| 35 | + EnableAggressive("amdgpu-remat-enable-hot-block-remat-aggressive"); |
34 | 36 | static cl::opt<unsigned> TargetOccupancy("amdgpu-remat-target-occupancy"); |
35 | 37 |
|
36 | 38 | namespace { |
@@ -723,6 +725,12 @@ int rematGain(MachineInstr *DefMI, unsigned Reg, const MachineRegisterInfo &MRI, |
723 | 725 | if (IsSingleDef) { |
724 | 726 | // The reg might share with other candidates, check It here. |
725 | 727 | // Count share reg in getReducedSize. |
| 728 | + if (EnableAggressive) { |
| 729 | + // In case of aggressive remat, treat multi use reg as shared reg and |
| 730 | + // ignore size of shared reg. |
| 731 | + if (!MRI.hasOneNonDBGUse(Reg)) |
| 732 | + continue; |
| 733 | + } |
726 | 734 | const TargetRegisterClass *OpRC = MRI.getRegClass(Reg); |
727 | 735 | if (unsigned SubIdx = MO.getSubReg()) { |
728 | 736 | if (OpRC) |
@@ -1253,6 +1261,9 @@ bool AMDGPUHotBlockRematerialize::hotBlockRemat(MachineFunction &MF, MachineLoop |
1253 | 1261 | unsigned SLimit = Status.TargetSLimit; |
1254 | 1262 |
|
1255 | 1263 | int RematSCnt = Status.MaxSPressure - SLimit; |
| 1264 | + // when agressive sgpr remat, reserve some for allocation lost. |
| 1265 | + if (EnableAggressive) |
| 1266 | + RematSCnt += NearTargetRegLimit; |
1256 | 1267 |
|
1257 | 1268 | bool IsSGPRSpill = false; |
1258 | 1269 | if (RematSCnt > 0) { |
@@ -1367,7 +1378,7 @@ bool AMDGPUHotBlockRematerialize::hotBlockRemat(MachineFunction &MF, MachineLoop |
1367 | 1378 | for (RematNode &Node : SRematList) { |
1368 | 1379 | SRematMap[Node.Reg] = Node; |
1369 | 1380 | RematCnt += Node.Size; |
1370 | | - if (RematCnt > RematSCnt) |
| 1381 | + if (RematCnt > RematSCnt && !EnableAggressive) |
1371 | 1382 | break; |
1372 | 1383 | } |
1373 | 1384 | NewRematSCnt = 0; |
|
0 commit comments