Skip to content

Commit 067a110

Browse files
authored
RegisterCoalescer: Do not introduce uses of empty register classes (#161809)
Check RegisterClassInfo if any registers of the new class are actually available for use. Currently AMDGPU overrides shouldCoalesce to avoid this situation. The target hook does not have access to the dynamic register class counts, but ideally the target hook would only be used for profitability concerns. The new test doesn't change, due to the AMDGPU shouldCoalesce override, but would be unallocatable if we dropped the override and switched to the default implementation. The existing limit-coalesce.mir already tests the behavior of this override, but it's too conservative and isn't checking the case where the new class is unallocatable. Add this check so it can be relaxed.
1 parent be9e747 commit 067a110

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2051,6 +2051,12 @@ bool RegisterCoalescer::joinCopy(
20512051
}
20522052

20532053
if (CP.getNewRC()) {
2054+
if (RegClassInfo.getNumAllocatableRegs(CP.getNewRC()) == 0) {
2055+
LLVM_DEBUG(dbgs() << "\tNo " << TRI->getRegClassName(CP.getNewRC())
2056+
<< "are available for allocation\n");
2057+
return false;
2058+
}
2059+
20542060
auto SrcRC = MRI->getRegClass(CP.getSrcReg());
20552061
auto DstRC = MRI->getRegClass(CP.getDstReg());
20562062
unsigned SrcIdx = CP.getSrcIdx();
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
; Make sure the coalescer doesn't introduce any uses of
5+
; vreg_1024. None are available to allocate with the register budget
6+
; of this function.
7+
8+
define void @no_introduce_vreg_1024() #0 {
9+
; CHECK-LABEL: no_introduce_vreg_1024:
10+
; CHECK: ; %bb.0:
11+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; CHECK-NEXT: ;;#ASMSTART
13+
; CHECK-NEXT: ; def v[0:7]
14+
; CHECK-NEXT: ;;#ASMEND
15+
; CHECK-NEXT: v_mov_b32_e32 v9, v0
16+
; CHECK-NEXT: ;;#ASMSTART
17+
; CHECK-NEXT: ; use v[0:15]
18+
; CHECK-NEXT: ;;#ASMEND
19+
; CHECK-NEXT: s_setpc_b64 s[30:31]
20+
%tuple = call <8 x i32> asm sideeffect "; def $0","=v"()
21+
%sub0 = extractelement <8 x i32> %tuple, i32 0
22+
%insert = insertelement <16 x i32> poison, i32 %sub0, i32 9
23+
call void asm sideeffect "; use $0","v"(<16 x i32> %insert)
24+
ret void
25+
}
26+
27+
attributes #0 = { nounwind "amdgpu-waves-per-eu"="10,10" }
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=register-coalescer -o - %s | FileCheck %s
3+
4+
# The register budget for this function does not permit using 1024-bit
5+
# registers. The coalescer should not introduce a 1024-bit virtual
6+
# register which will fail to allocate.
7+
8+
--- |
9+
define void @no_introduce_vreg_1024() #0 {
10+
ret void
11+
}
12+
13+
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
14+
...
15+
---
16+
name: no_introduce_vreg_1024
17+
tracksRegLiveness: true
18+
machineFunctionInfo:
19+
occupancy: 10
20+
body: |
21+
bb.0:
22+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
23+
24+
; CHECK-LABEL: name: no_introduce_vreg_1024
25+
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
26+
; CHECK-NEXT: {{ $}}
27+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
28+
; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub9:vreg_512 = COPY [[COPY]].sub0
29+
; CHECK-NEXT: SI_RETURN implicit [[COPY1]]
30+
%0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
31+
undef %1.sub9:vreg_512 = COPY %0.sub0
32+
SI_RETURN implicit %1
33+
34+
...

0 commit comments

Comments
 (0)