Skip to content

Commit 04b0856

Browse files
committed
[AMDGPU] Avoid hitting AMDGPUAsmPrinter related asserts for local functions at O0.
Local functions will be ignored for (codegen) CGSCC order passes. However, they may still be referenced. This patch allows such local functions to exist in AMDGPUResourceUsageAnalysis and bypasses referencing the local function's MachineFunction in AMDGPUAsmPrinter's code emit by reinserting the CGSCC pass manager through DummyCGSCCPass.
1 parent dc5bdcb commit 04b0856

File tree

8 files changed

+139
-110
lines changed

8 files changed

+139
-110
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,6 +1238,14 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
12381238
void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
12391239
AU.addRequired<AMDGPUResourceUsageAnalysis>();
12401240
AU.addPreserved<AMDGPUResourceUsageAnalysis>();
1241+
1242+
// The Dummy pass is necessary because AMDGPUResourceUsageAnalysis will pop
1243+
// the CGSCC pass manager off of the active pass managers stack. Adding the
1244+
// Dummy pass will re-insert the CGSCC pass manager into said stack again
1245+
// through CallGraphSCCPass::assignPassManager.
1246+
AU.addRequired<DummyCGSCCPass>();
1247+
AU.addPreserved<DummyCGSCCPass>();
1248+
12411249
AsmPrinter::getAnalysisUsage(AU);
12421250
}
12431251

llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,16 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
151151

152152
SIFunctionResourceInfo &Info = CI.first->second;
153153
MachineFunction *MF = MMI.getMachineFunction(*F);
154-
assert(MF && "function must have been generated already");
155-
Info = analyzeResourceUsage(*MF, TM);
156-
HasIndirectCall |= Info.HasIndirectCall;
154+
// We can only analyze resource usage of functions for which there exists a
155+
// machinefunction equivalent. These may not exist as the (codegen) passes
156+
// prior to this one are run in CGSCC order which will bypass any local
157+
// functions that aren't called.
158+
assert((MF || !TPC->requiresCodeGenSCCOrder()) &&
159+
"function must have been generated already");
160+
if (MF) {
161+
Info = analyzeResourceUsage(*MF, TM);
162+
HasIndirectCall |= Info.HasIndirectCall;
163+
}
157164
}
158165

159166
if (HasIndirectCall)

llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@
33

44
declare i32 @llvm.amdgcn.workitem.id.x()
55

6+
define <2 x i64> @f1() #0 {
7+
; GFX11-LABEL: f1:
8+
; GFX11: ; %bb.0:
9+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10+
; GFX11-NEXT: v_mov_b32_e32 v0, 0
11+
; GFX11-NEXT: v_mov_b32_e32 v1, 0
12+
; GFX11-NEXT: v_mov_b32_e32 v2, 0
13+
; GFX11-NEXT: v_mov_b32_e32 v3, 0
14+
; GFX11-NEXT: s_setpc_b64 s[30:31]
15+
ret <2 x i64> zeroinitializer
16+
}
17+
618
define void @f0() {
719
; GFX11-LABEL: f0:
820
; GFX11: ; %bb.0: ; %bb
@@ -36,18 +48,6 @@ bb:
3648
ret void
3749
}
3850

39-
define <2 x i64> @f1() #0 {
40-
; GFX11-LABEL: f1:
41-
; GFX11: ; %bb.0:
42-
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43-
; GFX11-NEXT: v_mov_b32_e32 v0, 0
44-
; GFX11-NEXT: v_mov_b32_e32 v1, 0
45-
; GFX11-NEXT: v_mov_b32_e32 v2, 0
46-
; GFX11-NEXT: v_mov_b32_e32 v3, 0
47-
; GFX11-NEXT: s_setpc_b64 s[30:31]
48-
ret <2 x i64> zeroinitializer
49-
}
50-
5151
; FIXME: This generates "instid1(/* invalid instid value */)".
5252
define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) {
5353
; GFX11-LABEL: f2:

llvm/test/CodeGen/AMDGPU/ipra.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,6 @@ define void @test_funcx2() #0 {
105105
ret void
106106
}
107107

108-
; GCN-LABEL: {{^}}wombat:
109-
define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
110-
bb:
111-
call void @hoge() #0
112-
ret void
113-
}
114-
115108
; Make sure we save/restore the return address around the call.
116109
; Function Attrs: norecurse
117110
define internal void @hoge() #2 {
@@ -128,6 +121,13 @@ bb:
128121
ret void
129122
}
130123

124+
; GCN-LABEL: {{^}}wombat:
125+
define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
126+
bb:
127+
call void @hoge() #0
128+
ret void
129+
}
130+
131131
declare dso_local void @eggs()
132132

133133

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,14 @@
142142
; GCN-O0-NEXT: Machine Optimization Remark Emitter
143143
; GCN-O0-NEXT: Stack Frame Layout Analysis
144144
; GCN-O0-NEXT: Function register usage analysis
145-
; GCN-O0-NEXT: FunctionPass Manager
146-
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
147-
; GCN-O0-NEXT: Machine Optimization Remark Emitter
148-
; GCN-O0-NEXT: AMDGPU Assembly Printer
149-
; GCN-O0-NEXT: Free MachineFunction
145+
; GCN-O0-NEXT: CallGraph Construction
146+
; GCN-O0-NEXT: Call Graph SCC Pass Manager
147+
; GCN-O0-NEXT: DummyCGSCCPass
148+
; GCN-O0-NEXT: FunctionPass Manager
149+
; GCN-O0-NEXT: Lazy Machine Block Frequency Analysis
150+
; GCN-O0-NEXT: Machine Optimization Remark Emitter
151+
; GCN-O0-NEXT: AMDGPU Assembly Printer
152+
; GCN-O0-NEXT: Free MachineFunction
150153

151154
; GCN-O1:Target Library Information
152155
; GCN-O1-NEXT:Target Pass Configuration
@@ -409,11 +412,14 @@
409412
; GCN-O1-NEXT: Machine Optimization Remark Emitter
410413
; GCN-O1-NEXT: Stack Frame Layout Analysis
411414
; GCN-O1-NEXT: Function register usage analysis
412-
; GCN-O1-NEXT: FunctionPass Manager
413-
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
414-
; GCN-O1-NEXT: Machine Optimization Remark Emitter
415-
; GCN-O1-NEXT: AMDGPU Assembly Printer
416-
; GCN-O1-NEXT: Free MachineFunction
415+
; GCN-O1-NEXT: CallGraph Construction
416+
; GCN-O1-NEXT: Call Graph SCC Pass Manager
417+
; GCN-O1-NEXT: DummyCGSCCPass
418+
; GCN-O1-NEXT: FunctionPass Manager
419+
; GCN-O1-NEXT: Lazy Machine Block Frequency Analysis
420+
; GCN-O1-NEXT: Machine Optimization Remark Emitter
421+
; GCN-O1-NEXT: AMDGPU Assembly Printer
422+
; GCN-O1-NEXT: Free MachineFunction
417423

418424
; GCN-O1-OPTS:Target Library Information
419425
; GCN-O1-OPTS-NEXT:Target Pass Configuration
@@ -698,11 +704,14 @@
698704
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
699705
; GCN-O1-OPTS-NEXT: Stack Frame Layout Analysis
700706
; GCN-O1-OPTS-NEXT: Function register usage analysis
701-
; GCN-O1-OPTS-NEXT: FunctionPass Manager
702-
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
703-
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
704-
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
705-
; GCN-O1-OPTS-NEXT: Free MachineFunction
707+
; GCN-O1-OPTS-NEXT: CallGraph Construction
708+
; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager
709+
; GCN-O1-OPTS-NEXT: DummyCGSCCPass
710+
; GCN-O1-OPTS-NEXT: FunctionPass Manager
711+
; GCN-O1-OPTS-NEXT: Lazy Machine Block Frequency Analysis
712+
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
713+
; GCN-O1-OPTS-NEXT: AMDGPU Assembly Printer
714+
; GCN-O1-OPTS-NEXT: Free MachineFunction
706715

707716
; GCN-O2:Target Library Information
708717
; GCN-O2-NEXT:Target Pass Configuration
@@ -999,11 +1008,14 @@
9991008
; GCN-O2-NEXT: Machine Optimization Remark Emitter
10001009
; GCN-O2-NEXT: Stack Frame Layout Analysis
10011010
; GCN-O2-NEXT: Function register usage analysis
1002-
; GCN-O2-NEXT: FunctionPass Manager
1003-
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
1004-
; GCN-O2-NEXT: Machine Optimization Remark Emitter
1005-
; GCN-O2-NEXT: AMDGPU Assembly Printer
1006-
; GCN-O2-NEXT: Free MachineFunction
1011+
; GCN-O2-NEXT: CallGraph Construction
1012+
; GCN-O2-NEXT: Call Graph SCC Pass Manager
1013+
; GCN-O2-NEXT: DummyCGSCCPass
1014+
; GCN-O2-NEXT: FunctionPass Manager
1015+
; GCN-O2-NEXT: Lazy Machine Block Frequency Analysis
1016+
; GCN-O2-NEXT: Machine Optimization Remark Emitter
1017+
; GCN-O2-NEXT: AMDGPU Assembly Printer
1018+
; GCN-O2-NEXT: Free MachineFunction
10071019

10081020
; GCN-O3:Target Library Information
10091021
; GCN-O3-NEXT:Target Pass Configuration
@@ -1312,11 +1324,14 @@
13121324
; GCN-O3-NEXT: Machine Optimization Remark Emitter
13131325
; GCN-O3-NEXT: Stack Frame Layout Analysis
13141326
; GCN-O3-NEXT: Function register usage analysis
1315-
; GCN-O3-NEXT: FunctionPass Manager
1316-
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
1317-
; GCN-O3-NEXT: Machine Optimization Remark Emitter
1318-
; GCN-O3-NEXT: AMDGPU Assembly Printer
1319-
; GCN-O3-NEXT: Free MachineFunction
1327+
; GCN-O3-NEXT: CallGraph Construction
1328+
; GCN-O3-NEXT: Call Graph SCC Pass Manager
1329+
; GCN-O3-NEXT: DummyCGSCCPass
1330+
; GCN-O3-NEXT: FunctionPass Manager
1331+
; GCN-O3-NEXT: Lazy Machine Block Frequency Analysis
1332+
; GCN-O3-NEXT: Machine Optimization Remark Emitter
1333+
; GCN-O3-NEXT: AMDGPU Assembly Printer
1334+
; GCN-O3-NEXT: Free MachineFunction
13201335

13211336
define void @empty() {
13221337
ret void

llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,19 @@
99
@lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
1010
@lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16
1111

12+
; GCN-LABEL: {{^}}f0:
13+
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
14+
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
15+
; GCN: ds_write_b8 [[NULL]], [[TREE]]
16+
define void @f0() {
17+
; OPT-LABEL: @f0(
18+
; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
19+
; OPT-NEXT: ret void
20+
;
21+
store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
22+
ret void
23+
}
24+
1225
; GCN-LABEL: {{^}}k0:
1326
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
1427
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
@@ -29,16 +42,3 @@ define amdgpu_kernel void @k0() {
2942
call void @f0()
3043
ret void
3144
}
32-
33-
; GCN-LABEL: {{^}}f0:
34-
; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
35-
; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
36-
; GCN: ds_write_b8 [[NULL]], [[TREE]]
37-
define void @f0() {
38-
; OPT-LABEL: @f0() {
39-
; OPT-NEXT: store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
40-
; OPT-NEXT: ret void
41-
;
42-
store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
43-
ret void
44-
}

llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,54 @@ store i32 0, ptr addrspace(3) @used_by_kernel
2424
}
2525
; CHECK: ; LDSByteSize: 4 bytes
2626

27+
define void @nonkernel() {
28+
; GFX9-LABEL: nonkernel:
29+
; GFX9: ; %bb.0:
30+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
32+
; GFX9-NEXT: v_mov_b32_e32 v1, v0
33+
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
34+
; GFX9-NEXT: ds_write_b64 v0, v[0:1]
35+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
36+
; GFX9-NEXT: s_setpc_b64 s[30:31]
37+
;
38+
; GFX10-LABEL: nonkernel:
39+
; GFX10: ; %bb.0:
40+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41+
; GFX10-NEXT: v_mov_b32_e32 v0, 0
42+
; GFX10-NEXT: v_mov_b32_e32 v1, v0
43+
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
44+
; GFX10-NEXT: ds_write_b64 v0, v[0:1]
45+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
46+
; GFX10-NEXT: s_setpc_b64 s[30:31]
47+
;
48+
; G_GFX9-LABEL: nonkernel:
49+
; G_GFX9: ; %bb.0:
50+
; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51+
; G_GFX9-NEXT: v_mov_b32_e32 v2, 0
52+
; G_GFX9-NEXT: v_mov_b32_e32 v3, 8
53+
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
54+
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0
55+
; G_GFX9-NEXT: ds_write_b32 v3, v2
56+
; G_GFX9-NEXT: ds_write_b64 v2, v[0:1]
57+
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
58+
; G_GFX9-NEXT: s_setpc_b64 s[30:31]
59+
;
60+
; G_GFX10-LABEL: nonkernel:
61+
; G_GFX10: ; %bb.0:
62+
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63+
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
64+
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
65+
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
66+
; G_GFX10-NEXT: v_mov_b32_e32 v1, 0
67+
; G_GFX10-NEXT: ds_write_b32 v3, v2
68+
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
69+
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
70+
; G_GFX10-NEXT: s_setpc_b64 s[30:31]
71+
store i32 0, ptr addrspace(3) @used_by_both
72+
store double 0.0, ptr addrspace(3) @used_by_function
73+
ret void
74+
}
2775
; Needs to allocate both variables, store to used_by_both is at sizeof(double)
2876
define amdgpu_kernel void @withcall() {
2977
; GFX9-LABEL: withcall:
@@ -135,52 +183,3 @@ define amdgpu_kernel void @nocall_false_sharing() {
135183
}
136184
; CHECK: ; LDSByteSize: 4 bytes
137185

138-
139-
define void @nonkernel() {
140-
; GFX9-LABEL: nonkernel:
141-
; GFX9: ; %bb.0:
142-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143-
; GFX9-NEXT: v_mov_b32_e32 v0, 0
144-
; GFX9-NEXT: v_mov_b32_e32 v1, v0
145-
; GFX9-NEXT: ds_write_b32 v0, v0 offset:8
146-
; GFX9-NEXT: ds_write_b64 v0, v[0:1]
147-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
148-
; GFX9-NEXT: s_setpc_b64 s[30:31]
149-
;
150-
; GFX10-LABEL: nonkernel:
151-
; GFX10: ; %bb.0:
152-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153-
; GFX10-NEXT: v_mov_b32_e32 v0, 0
154-
; GFX10-NEXT: v_mov_b32_e32 v1, v0
155-
; GFX10-NEXT: ds_write_b32 v0, v0 offset:8
156-
; GFX10-NEXT: ds_write_b64 v0, v[0:1]
157-
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
158-
; GFX10-NEXT: s_setpc_b64 s[30:31]
159-
;
160-
; G_GFX9-LABEL: nonkernel:
161-
; G_GFX9: ; %bb.0:
162-
; G_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163-
; G_GFX9-NEXT: v_mov_b32_e32 v2, 0
164-
; G_GFX9-NEXT: v_mov_b32_e32 v3, 8
165-
; G_GFX9-NEXT: v_mov_b32_e32 v0, 0
166-
; G_GFX9-NEXT: v_mov_b32_e32 v1, 0
167-
; G_GFX9-NEXT: ds_write_b32 v3, v2
168-
; G_GFX9-NEXT: ds_write_b64 v2, v[0:1]
169-
; G_GFX9-NEXT: s_waitcnt lgkmcnt(0)
170-
; G_GFX9-NEXT: s_setpc_b64 s[30:31]
171-
;
172-
; G_GFX10-LABEL: nonkernel:
173-
; G_GFX10: ; %bb.0:
174-
; G_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175-
; G_GFX10-NEXT: v_mov_b32_e32 v2, 0
176-
; G_GFX10-NEXT: v_mov_b32_e32 v3, 8
177-
; G_GFX10-NEXT: v_mov_b32_e32 v0, 0
178-
; G_GFX10-NEXT: v_mov_b32_e32 v1, 0
179-
; G_GFX10-NEXT: ds_write_b32 v3, v2
180-
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
181-
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
182-
; G_GFX10-NEXT: s_setpc_b64 s[30:31]
183-
store i32 0, ptr addrspace(3) @used_by_both
184-
store double 0.0, ptr addrspace(3) @used_by_function
185-
ret void
186-
}

llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
@gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
88

9-
; GCN-LABEL: unreachable:
9+
; GCN-NOT: unreachable:
1010
; Function info:
1111
; codeLenInByte = 4
1212
define internal fastcc void @unreachable() {

0 commit comments

Comments
 (0)