Skip to content

Commit 846238b

Browse files
committed
Rebase for new test + improve comment
1 parent 6291135 commit 846238b

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,9 @@ class PreRARematStage : public GCNSchedStage {
484484
}
485485

486486
/// Returns whether is is always beneficial to rematerialize this register.
487+
/// These are rematerializations that never move instructions into higher
488+
/// frequency regions and at least shorten live intervals, so they are
489+
/// always useful irrespective of RP targets.
487490
bool isAlwaysBeneficial() const {
488491
// When the using region is executed a single time, we know
489492
// rematerializing will be beneficial whatever the defining region's
@@ -555,7 +558,7 @@ class PreRARematStage : public GCNSchedStage {
555558
/// Per-region contribution weights to RP score depending on whether RP is
556559
/// guaranteed or only likely to be reduced in the region. Only their
557560
/// relative value w.r.t. one another matter.
558-
static constexpr int WeightRP = 10, WeightRPMaybe = 5;
561+
static constexpr int WeightRP = 2, WeightRPMaybe = 1;
559562

560563
/// Number of 32-bit registers this rematerialization covers.
561564
const unsigned NumRegs;

llvm/test/CodeGen/AMDGPU/uniform-alignbit.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ define amdgpu_kernel void @uniform_build_vector(i64 %in, ptr addrspace(1) %out)
1111
; GCN-NEXT: s_mov_b32 s6, s5
1212
; GCN-NEXT: s_mov_b32 s7, s5
1313
; GCN-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
14-
; GCN-NEXT: s_waitcnt vmcnt(0)
15-
; GCN-NEXT: v_mov_b32_e32 v1, 0
1614
; GCN-NEXT: ; sched_barrier mask(0x00000000)
15+
; GCN-NEXT: s_waitcnt vmcnt(0)
1716
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
1817
; GCN-NEXT: v_cvt_f32_ubyte0_e32 v0, v0
18+
; GCN-NEXT: v_mov_b32_e32 v1, 0
1919
; GCN-NEXT: global_store_dword v1, v0, s[2:3]
2020
; GCN-NEXT: s_endpgm
2121
entry:
@@ -35,4 +35,4 @@ entry:
3535
declare void @llvm.amdgcn.sched.barrier(i32 immarg) #0
3636

3737
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
38-
declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) #1
38+
declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) #1

0 commit comments

Comments
 (0)