- 
                Notifications
    
You must be signed in to change notification settings  - Fork 15.1k
 
Limit Alloca->LDS promotion based on speculations as to eventual register pressure #152814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9822d9b
              7b86ccc
              1d3e1c1
              be47b63
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -27,7 +27,9 @@ | |
| 
     | 
||
| #include "AMDGPU.h" | ||
| #include "GCNSubtarget.h" | ||
| #include "SIRegisterInfo.h" | ||
| #include "Utils/AMDGPUBaseInfo.h" | ||
| #include "llvm/ADT/PostOrderIterator.h" | ||
| #include "llvm/ADT/STLExtras.h" | ||
| #include "llvm/Analysis/CaptureTracking.h" | ||
| #include "llvm/Analysis/InstSimplifyFolder.h" | ||
| 
        
          
        
         | 
    @@ -36,6 +38,7 @@ | |
| #include "llvm/Analysis/ValueTracking.h" | ||
| #include "llvm/CodeGen/TargetPassConfig.h" | ||
| #include "llvm/IR/IRBuilder.h" | ||
| #include "llvm/IR/Instruction.h" | ||
| #include "llvm/IR/IntrinsicInst.h" | ||
| #include "llvm/IR/IntrinsicsAMDGPU.h" | ||
| #include "llvm/IR/IntrinsicsR600.h" | ||
| 
        
          
        
         | 
    @@ -45,6 +48,9 @@ | |
| #include "llvm/Target/TargetMachine.h" | ||
| #include "llvm/Transforms/Utils/SSAUpdater.h" | ||
| 
     | 
||
| #include <algorithm> | ||
| #include <unordered_set> | ||
| 
     | 
||
| #define DEBUG_TYPE "amdgpu-promote-alloca" | ||
| 
     | 
||
| using namespace llvm; | ||
| 
          
            
          
           | 
    @@ -100,6 +106,14 @@ class AMDGPUPromoteAllocaImpl { | |
| unsigned VGPRBudgetRatio; | ||
| unsigned MaxVectorRegs; | ||
| 
     | 
||
| std::unordered_map<BasicBlock *, std::unordered_set<Instruction *>> | ||
| SGPRLiveIns; | ||
| size_t getSGPRPressureEstimate(AllocaInst &I); | ||
| 
     | 
||
| std::unordered_map<BasicBlock *, std::unordered_set<Instruction *>> | ||
| VGPRLiveIns; | ||
| size_t getVGPRPressureEstimate(AllocaInst &I); | ||
| 
     | 
||
| bool IsAMDGCN = false; | ||
| bool IsAMDHSA = false; | ||
| 
     | 
||
| 
          
            
          
           | 
    @@ -1471,9 +1485,87 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) { | |
| return true; | ||
| } | ||
| 
     | 
||
| size_t AMDGPUPromoteAllocaImpl::getSGPRPressureEstimate(AllocaInst &I) { | ||
| Function &F = *I.getFunction(); | ||
| size_t MaxLive = 0; | ||
| for (BasicBlock *BB : post_order(&F)) { | ||
| if (SGPRLiveIns.count(BB)) | ||
                
      
                  linuxrocks123 marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| continue; | ||
| 
     | 
||
| std::unordered_set<Instruction *> CurrentlyLive; | ||
| for (BasicBlock *SuccBB : successors(BB)) | ||
| if (SGPRLiveIns.count(SuccBB)) | ||
                
      
                  linuxrocks123 marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| for (const auto &R : SGPRLiveIns[SuccBB]) | ||
| CurrentlyLive.insert(R); | ||
| 
     | 
||
| for (auto RIt = BB->rbegin(); RIt != BB->rend(); RIt++) { | ||
                
      
                  linuxrocks123 marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| if (&*RIt == &I) | ||
| return MaxLive; | ||
| 
     | 
||
| MaxLive = std::max(MaxLive, CurrentlyLive.size()); | ||
| 
     | 
||
| for (auto &Op : RIt->operands()) | ||
| if (!Op.get()->getType()->isVectorTy()) | ||
| if (Instruction *U = dyn_cast<Instruction>(Op)) | ||
| CurrentlyLive.insert(U); | ||
| 
     | 
||
| if (!RIt->getType()->isVectorTy()) | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SGPR does not mean "not an IR vector type" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arsenm do you know of a better heuristic for estimating SGPR pressure at the IR level?  | 
||
| CurrentlyLive.erase(&*RIt); | ||
| } | ||
| 
     | 
||
| SGPRLiveIns[BB] = CurrentlyLive; | ||
| } | ||
| 
     | 
||
| llvm_unreachable("Woops, we fell off the edge of the world. Bye bye."); | ||
| } | ||
| 
     | 
||
| size_t AMDGPUPromoteAllocaImpl::getVGPRPressureEstimate(AllocaInst &I) { | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The IR has no knowledge of SGPRs or VGPRs, and you're missing out on all the pressures exposed by legalization  | 
||
| Function &F = *I.getParent()->getParent(); | ||
| size_t MaxLive = 0; | ||
| for (BasicBlock *BB : post_order(&F)) { | ||
| if (VGPRLiveIns.count(BB)) | ||
                
      
                  linuxrocks123 marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| continue; | ||
| 
     | 
||
| std::unordered_set<Instruction *> CurrentlyLive; | ||
| for (BasicBlock *SuccBB : successors(BB)) | ||
| if (VGPRLiveIns.count(SuccBB)) | ||
                
      
                  linuxrocks123 marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| for (const auto &R : VGPRLiveIns[SuccBB]) | ||
| CurrentlyLive.insert(R); | ||
| 
     | 
||
| for (auto RIt = BB->rbegin(); RIt != BB->rend(); RIt++) { | ||
| if (&*RIt == &I) | ||
| return MaxLive; | ||
| 
     | 
||
| MaxLive = std::max(MaxLive, CurrentlyLive.size()); | ||
| 
     | 
||
| for (auto &Op : RIt->operands()) | ||
| if (Op.get()->getType()->isVectorTy()) | ||
| if (Instruction *U = dyn_cast<Instruction>(Op)) | ||
| CurrentlyLive.insert(U); | ||
| 
     | 
||
| if (RIt->getType()->isVectorTy()) | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. VGPR doesn't mean "IR vector type" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arsenm do you know of a better heuristic for estimating VGPR pressure at the IR level? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, but the type has nothing to do with it. We don't have any real / precise attempts pressure heuristics in IR  | 
||
| CurrentlyLive.erase(&*RIt); | ||
| } | ||
| 
     | 
||
| VGPRLiveIns[BB] = CurrentlyLive; | ||
| } | ||
| 
     | 
||
| llvm_unreachable("Woops, we fell off the edge of the world. Bye bye."); | ||
| } | ||
| 
     | 
||
| // FIXME: Should try to pick the most likely to be profitable allocas first. | ||
| bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I, | ||
| bool SufficientLDS) { | ||
| const unsigned SGPRPressureLimit = AMDGPU::SGPR_32RegClass.getNumRegs(); | ||
| const unsigned VGPRPressureLimit = AMDGPU::VGPR_32RegClass.getNumRegs(); | ||
| 
     | 
||
| if (getSGPRPressureEstimate(I) > SGPRPressureLimit || | ||
| getVGPRPressureEstimate(I) > VGPRPressureLimit) { | ||
| LLVM_DEBUG(dbgs() << "Declining to promote " << I | ||
| << " to LDS since pressure is relatively high.\n"); | ||
| return false; | ||
| } | ||
| 
     | 
||
| LLVM_DEBUG(dbgs() << "Trying to promote to LDS: " << I << '\n'); | ||
| 
     | 
||
| if (DisablePromoteAllocaToLDS) { | ||
| 
          
            
          
           | 
    ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Depending on density of your map one could use
getNumber()onBasicBlockto get a unique number for mapping.E.g. this could be a SmallVector of sets with a size of F.getMaxBlockNumber().
For the set I'd suggest using a
SmallPtrSetinstead.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That may perform slightly better, but it would be harder to read. Perhaps it would be possible to encapsulate this algorithm into a set data structure somehow.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tgymnich I can change it to SmallPtrSet for the value type, but the problem with using a vector is that I need to know whether the map contains a value. That would require making a vector of pointers to the SmallPtrSet instead of a vector of SmallPtrSets. I'm not sure if that would perform better than what I currently have.