- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
[AMDGPU] Use table strategy for LowerModuleLDSPass at O0 #160181
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
edf3d4f
              0544d57
              a31054a
              e8459cf
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|  | @@ -588,14 +588,17 @@ class AMDGPULowerModuleLDS { | |||||
| return OrderedKernels; | ||||||
| } | ||||||
|  | ||||||
| static void partitionVariablesIntoIndirectStrategies( | ||||||
| void partitionVariablesIntoIndirectStrategies( | ||||||
| Module &M, LDSUsesInfoTy const &LDSUsesInfo, | ||||||
| VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly, | ||||||
| DenseSet<GlobalVariable *> &ModuleScopeVariables, | ||||||
| DenseSet<GlobalVariable *> &TableLookupVariables, | ||||||
| DenseSet<GlobalVariable *> &KernelAccessVariables, | ||||||
| DenseSet<GlobalVariable *> &DynamicVariables) { | ||||||
|  | ||||||
| if (TM.getOptLevel() == CodeGenOptLevel::None) | ||||||
| LoweringKindLoc = LoweringKind::table; | ||||||
|  | ||||||
| GlobalVariable *HybridModuleRoot = | ||||||
| LoweringKindLoc != LoweringKind::hybrid | ||||||
| ? nullptr | ||||||
|  | @@ -1188,13 +1191,17 @@ class AMDGPULowerModuleLDS { | |||||
| // Allocated at zero, recorded once on construction, not once per | ||||||
| // kernel | ||||||
| Offset += DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType()); | ||||||
| LLVM_DEBUG(dbgs() << "amdgpu-lds-size after ModuleScopeStruct" | ||||||
| << Offset << "\n"); | ||||||
| } | ||||||
|  | ||||||
| if (AllocateKernelScopeStruct) { | ||||||
| GlobalVariable *KernelStruct = Replacement->second.SGV; | ||||||
| Offset = alignTo(Offset, AMDGPU::getAlign(DL, KernelStruct)); | ||||||
| recordLDSAbsoluteAddress(&M, KernelStruct, Offset); | ||||||
| Offset += DL.getTypeAllocSize(KernelStruct->getValueType()); | ||||||
| LLVM_DEBUG(dbgs() | ||||||
| << "amdgpu-lds-size after KernelStruct" << Offset << "\n"); | ||||||
|          | ||||||
| << "amdgpu-lds-size after KernelStruct" << Offset << "\n"); | |
| << "amdgpu-lds-size after KernelStruct" << Offset << '\n'); | 
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,92 @@ | ||||||||||||||
| ; RUN: not llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=null < %s 2>&1 | FileCheck --check-prefix=CHECK %s | ||||||||||||||
| ; CHECK-NOT: error: <unknown>:0:0: local memory (98304) exceeds limit (65536) in function 'k2' | ||||||||||||||
|          | ||||||||||||||
|  | ||||||||||||||
| @gA = internal addrspace(3) global [32768 x i8] undef, align 4 | ||||||||||||||
| @gB = internal addrspace(3) global [32768 x i8] undef, align 4 | ||||||||||||||
| @gC = internal addrspace(3) global [32768 x i8] undef, align 4 | ||||||||||||||
|          | ||||||||||||||
| @gA = internal addrspace(3) global [32768 x i8] undef, align 4 | |
| @gB = internal addrspace(3) global [32768 x i8] undef, align 4 | |
| @gC = internal addrspace(3) global [32768 x i8] undef, align 4 | |
| @gA = internal addrspace(3) global [32768 x i8] poison, align 4 | |
| @gB = internal addrspace(3) global [32768 x i8] poison, align 4 | |
| @gC = internal addrspace(3) global [32768 x i8] poison, align 4 | 
        
          
              
                Outdated
          
        
      There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove all the inlinehint, they aren't doing anything
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should not override the explicit flag. This also seems like a dubious way to avoid going over the limit; we can't rely on other optimizations without -O0 either. It it possible to compute the size usage with the different strategies before committing to one?