Skip to content

Commit 0e1a6d8

Browse files
committed
Early exit on maxWavesPerEU and rebase
1 parent 474f5d8 commit 0e1a6d8

File tree

3 files changed

+19
-19
lines changed

3 files changed

+19
-19
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,10 @@ bool PreRARematStage::initGCNSchedStage() {
12941294

12951295
if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() <= 1)
12961296
return false;
1297+
if (DAG.MinOccupancy >= MFI.getMaxWavesPerEU() &&
1298+
!MF.getFunction().hasFnAttribute("amdgpu-num-sgpr") &&
1299+
!MF.getFunction().hasFnAttribute("amdgpu-num-vgpr"))
1300+
return false;
12971301

12981302
// Maps all MIs (except lone terminators, which are not part of any region) to
12991303
// their parent region. Non-lone terminators are considered part of the region

llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-memcpy.ll

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -419,12 +419,14 @@ define amdgpu_kernel void @memcpy_known(ptr addrspace(7) %src, ptr addrspace(7)
419419
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
420420
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
421421
; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
422-
; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
423-
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
424-
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
425-
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
426-
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
427422
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
423+
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
424+
; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
425+
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
426+
; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
427+
; GISEL-GFX942-NEXT: s_mov_b32 s4, s13
428+
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
429+
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
428430
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, 0x2000
429431
; GISEL-GFX942-NEXT: .LBB0_1: ; %load-store-loop
430432
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -943,12 +945,14 @@ define amdgpu_kernel void @memcpy_known_medium(ptr addrspace(7) %src, ptr addrsp
943945
; GISEL-GFX942-NEXT: s_load_dword s11, s[4:5], 0x34
944946
; GISEL-GFX942-NEXT: s_load_dwordx4 s[12:15], s[4:5], 0x44
945947
; GISEL-GFX942-NEXT: s_mov_b32 s16, 0
946-
; GISEL-GFX942-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
947-
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
948-
; GISEL-GFX942-NEXT: s_mov_b32 s2, s7
949-
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
950-
; GISEL-GFX942-NEXT: s_or_b64 s[6:7], s[6:7], s[2:3]
951948
; GISEL-GFX942-NEXT: v_mov_b32_e32 v0, s16
949+
; GISEL-GFX942-NEXT: s_waitcnt lgkmcnt(0)
950+
; GISEL-GFX942-NEXT: s_mov_b32 s8, s1
951+
; GISEL-GFX942-NEXT: s_mov_b32 s9, s2
952+
; GISEL-GFX942-NEXT: s_mov_b32 s10, s3
953+
; GISEL-GFX942-NEXT: s_mov_b32 s4, s13
954+
; GISEL-GFX942-NEXT: s_mov_b32 s5, s14
955+
; GISEL-GFX942-NEXT: s_mov_b32 s6, s15
952956
; GISEL-GFX942-NEXT: v_mov_b32_e32 v1, 0x100
953957
; GISEL-GFX942-NEXT: .LBB1_1: ; %load-store-loop
954958
; GISEL-GFX942-NEXT: ; =>This Inner Loop Header: Depth=1

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-wwm.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,8 @@
44
define amdgpu_gs i32 @main() {
55
; CHECK-LABEL: main:
66
; CHECK: ; %bb.0: ; %bb
7-
; CHECK-NEXT: s_bitcmp1_b32 0, 0
87
; CHECK-NEXT: s_mov_b32 s0, 0
9-
; CHECK-NEXT: s_cselect_b32 s1, -1, 0
10-
; CHECK-NEXT: s_or_saveexec_b32 s2, -1
11-
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1
12-
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
13-
; CHECK-NEXT: v_readfirstlane_b32 s1, v0
14-
; CHECK-NEXT: s_mov_b32 exec_lo, s2
15-
; CHECK-NEXT: s_or_b32 s0, s0, s1
16-
; CHECK-NEXT: s_wait_alu 0xfffe
8+
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
179
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
1810
; CHECK-NEXT: s_cselect_b32 s0, -1, 0
1911
; CHECK-NEXT: s_xor_b32 s0, s0, -1

0 commit comments

Comments
 (0)