Skip to content

Commit 2bc7030

Browse files
committed
[AMDGPU] Reflect amdgpu-waves-per-eu attribute minimum occupancy to RegPressure analysis
(used in machine licm, machine scheduler, and register allocation)
1 parent 5eb7126 commit 2bc7030

File tree

2 files changed

+32
-43
lines changed

2 files changed

+32
-43
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3758,6 +3758,11 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
37583758
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
37593759
MachineFunction &MF) const {
37603760
unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
3761+
Function &F = MF.getFunction();
3762+
if (AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", true) !=
3763+
std::nullopt) {
3764+
MinOcc = ST.getWavesPerEU(F).first;
3765+
}
37613766
switch (RC->getID()) {
37623767
default:
37633768
return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 27 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -375,64 +375,48 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 {
375375
; GFX908-NEXT: ;;#ASMSTART
376376
; GFX908-NEXT: ; def v[0:31] a[0:15]
377377
; GFX908-NEXT: ;;#ASMEND
378-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a15
379-
; GFX908-NEXT: ;;#ASMSTART
380-
; GFX908-NEXT: ; def v32
381-
; GFX908-NEXT: ;;#ASMEND
382-
; GFX908-NEXT: s_nop 1
383-
; GFX908-NEXT: v_accvgpr_write_b32 a31, v35
378+
; GFX908-NEXT: v_accvgpr_read_b32 v32, a15
384379
; GFX908-NEXT: v_accvgpr_read_b32 v35, a14
385-
; GFX908-NEXT: s_nop 1
380+
; GFX908-NEXT: v_accvgpr_read_b32 v36, a13
381+
; GFX908-NEXT: v_accvgpr_write_b32 a31, v32
382+
; GFX908-NEXT: v_accvgpr_read_b32 v32, a12
386383
; GFX908-NEXT: v_accvgpr_write_b32 a30, v35
387-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a13
388-
; GFX908-NEXT: s_nop 1
389-
; GFX908-NEXT: v_accvgpr_write_b32 a29, v35
390-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a12
391-
; GFX908-NEXT: s_nop 1
392-
; GFX908-NEXT: v_accvgpr_write_b32 a28, v35
384+
; GFX908-NEXT: v_accvgpr_write_b32 a29, v36
385+
; GFX908-NEXT: v_accvgpr_write_b32 a28, v32
393386
; GFX908-NEXT: v_accvgpr_read_b32 v35, a11
394-
; GFX908-NEXT: s_nop 1
387+
; GFX908-NEXT: v_accvgpr_read_b32 v36, a10
388+
; GFX908-NEXT: v_accvgpr_read_b32 v32, a9
395389
; GFX908-NEXT: v_accvgpr_write_b32 a27, v35
396-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a10
397-
; GFX908-NEXT: s_nop 1
398-
; GFX908-NEXT: v_accvgpr_write_b32 a26, v35
399-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a9
400-
; GFX908-NEXT: s_nop 1
401-
; GFX908-NEXT: v_accvgpr_write_b32 a25, v35
390+
; GFX908-NEXT: v_accvgpr_write_b32 a26, v36
391+
; GFX908-NEXT: v_accvgpr_write_b32 a25, v32
402392
; GFX908-NEXT: v_accvgpr_read_b32 v35, a8
403-
; GFX908-NEXT: s_nop 1
393+
; GFX908-NEXT: v_accvgpr_read_b32 v36, a7
394+
; GFX908-NEXT: v_accvgpr_read_b32 v32, a6
404395
; GFX908-NEXT: v_accvgpr_write_b32 a24, v35
405-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a7
406-
; GFX908-NEXT: s_nop 1
407-
; GFX908-NEXT: v_accvgpr_write_b32 a23, v35
408-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a6
409-
; GFX908-NEXT: s_nop 1
410-
; GFX908-NEXT: v_accvgpr_write_b32 a22, v35
396+
; GFX908-NEXT: v_accvgpr_write_b32 a23, v36
397+
; GFX908-NEXT: v_accvgpr_write_b32 a22, v32
411398
; GFX908-NEXT: v_accvgpr_read_b32 v35, a5
412-
; GFX908-NEXT: s_nop 1
399+
; GFX908-NEXT: v_accvgpr_read_b32 v36, a4
400+
; GFX908-NEXT: v_accvgpr_read_b32 v32, a3
413401
; GFX908-NEXT: v_accvgpr_write_b32 a21, v35
414-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a4
415-
; GFX908-NEXT: s_nop 1
416-
; GFX908-NEXT: v_accvgpr_write_b32 a20, v35
417-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a3
418-
; GFX908-NEXT: s_nop 1
419-
; GFX908-NEXT: v_accvgpr_write_b32 a19, v35
402+
; GFX908-NEXT: v_accvgpr_write_b32 a20, v36
403+
; GFX908-NEXT: v_accvgpr_write_b32 a19, v32
420404
; GFX908-NEXT: v_accvgpr_read_b32 v35, a2
421-
; GFX908-NEXT: s_nop 1
405+
; GFX908-NEXT: v_accvgpr_read_b32 v36, a1
406+
; GFX908-NEXT: v_accvgpr_read_b32 v32, a0
422407
; GFX908-NEXT: v_accvgpr_write_b32 a18, v35
423-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a1
424-
; GFX908-NEXT: s_nop 1
425-
; GFX908-NEXT: v_accvgpr_write_b32 a17, v35
426-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a0
427-
; GFX908-NEXT: s_nop 1
428-
; GFX908-NEXT: v_accvgpr_write_b32 a16, v35
408+
; GFX908-NEXT: v_accvgpr_write_b32 a17, v36
409+
; GFX908-NEXT: v_accvgpr_write_b32 a16, v32
410+
; GFX908-NEXT: ;;#ASMSTART
411+
; GFX908-NEXT: ; def v32
412+
; GFX908-NEXT: ;;#ASMEND
429413
; GFX908-NEXT: ;;#ASMSTART
430414
; GFX908-NEXT: ; copy
431415
; GFX908-NEXT: ;;#ASMEND
432-
; GFX908-NEXT: v_accvgpr_read_b32 v35, a1
416+
; GFX908-NEXT: v_accvgpr_read_b32 v37, a1
433417
; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31]
434418
; GFX908-NEXT: s_nop 0
435-
; GFX908-NEXT: v_accvgpr_write_b32 a32, v35
419+
; GFX908-NEXT: v_accvgpr_write_b32 a32, v37
436420
; GFX908-NEXT: ;;#ASMSTART
437421
; GFX908-NEXT: ; copy
438422
; GFX908-NEXT: ;;#ASMEND

0 commit comments

Comments
 (0)