Skip to content

Commit 33b0fc5

Browse files
[EXTERNAL] Undo changes in AMDGPUPromoteAlloca in order to unblock our CI (#2028)
Undo bf1fe016691ae10796d71f7db7753670c0f1d891
1 parent 0776656 commit 33b0fc5

File tree

6 files changed

+126
-470
lines changed

6 files changed

+126
-470
lines changed

external/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static cl::opt<unsigned> PromoteAllocaToVectorMaxRegs(
7070
"amdgpu-promote-alloca-to-vector-max-regs",
7171
cl::desc(
7272
"Maximum vector size (in 32b registers) to use when promoting alloca"),
73-
cl::init(32));
73+
cl::init(16));
7474

7575
// Use up to 1/4 of available register budget for vectorization.
7676
// FIXME: Increase the limit for whole function budgets? Perhaps x2?
@@ -287,12 +287,8 @@ void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
287287

288288
void AMDGPUPromoteAllocaImpl::setFunctionLimits(const Function &F) {
289289
// Load per function limits, overriding with global options where appropriate.
290-
// R600 register tuples/aliasing are fragile with large vector promotions so
291-
// apply architecture specific limit here.
292-
const int R600MaxVectorRegs = 16;
293290
MaxVectorRegs = F.getFnAttributeAsParsedInteger(
294-
"amdgpu-promote-alloca-to-vector-max-regs",
295-
IsAMDGCN ? PromoteAllocaToVectorMaxRegs : R600MaxVectorRegs);
291+
"amdgpu-promote-alloca-to-vector-max-regs", PromoteAllocaToVectorMaxRegs);
296292
if (PromoteAllocaToVectorMaxRegs.getNumOccurrences())
297293
MaxVectorRegs = PromoteAllocaToVectorMaxRegs;
298294
VGPRBudgetRatio = F.getFnAttributeAsParsedInteger(

external/llvm-project/llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,31 +154,26 @@ define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 {
154154
; CHECK-LABEL: realign_stack:
155155
; CHECK: ; %bb.0:
156156
; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2)
157-
; CHECK-NEXT: v_mov_b32_e32 v32, 0
158-
; CHECK-NEXT: s_cmp_lg_u32 0, s33
159157
; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi
160-
; CHECK-NEXT: s_cmovk_i32 s33, 0x200
158+
; CHECK-NEXT: s_cmp_lg_u32 0, s33
161159
; CHECK-NEXT: s_mov_b32 s0, callee@abs32@lo
162-
; CHECK-NEXT: scratch_store_b32 off, v32, s33 scope:SCOPE_SYS
163-
; CHECK-NEXT: s_wait_storecnt 0x0
160+
; CHECK-NEXT: s_cmovk_i32 s33, 0x200
161+
; CHECK-NEXT: s_movk_i32 s32, 0x100
164162
; CHECK-NEXT: s_clause 0x7
165-
; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96
166163
; CHECK-NEXT: scratch_store_b128 off, v[28:31], s33 offset:112
167-
; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64
164+
; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96
168165
; CHECK-NEXT: scratch_store_b128 off, v[20:23], s33 offset:80
169-
; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32
166+
; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64
170167
; CHECK-NEXT: scratch_store_b128 off, v[12:15], s33 offset:48
168+
; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32
171169
; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16
172170
; CHECK-NEXT: scratch_store_b128 off, v[0:3], s33
173171
; CHECK-NEXT: v_mov_b32_e32 v0, 0x47
174-
; CHECK-NEXT: s_movk_i32 s32, 0x100
175172
; CHECK-NEXT: s_cmovk_i32 s32, 0x300
176173
; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1]
177174
; CHECK-NEXT: s_alloc_vgpr 0
178175
; CHECK-NEXT: s_endpgm
179176
%v = alloca <32 x i32>, align 128, addrspace(5)
180-
; use volatile store to avoid promotion of alloca to registers
181-
store volatile i32 0, ptr addrspace(5) %v
182177
store <32 x i32> %x, ptr addrspace(5) %v
183178
call amdgpu_gfx void @callee(i32 71)
184179
ret void

external/llvm-project/llvm/test/CodeGen/AMDGPU/machine-function-info-cwsr.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 {
3535
; CHECK-LABEL: {{^}}name: realign_stack
3636
; CHECK: scratchReservedForDynamicVGPRs: 512
3737
%v = alloca <32 x i32>, align 128, addrspace(5)
38-
; use volatile store to avoid promotion of alloca to registers
39-
store volatile <32 x i32> %x, ptr addrspace(5) %v
38+
store <32 x i32> %x, ptr addrspace(5) %v
4039
call amdgpu_gfx void @callee(i32 71)
4140
ret void
4241
}

0 commit comments

Comments
 (0)