Skip to content

Commit c77efd9

Browse files
committed
[AMDGPU] Fix .Lfunc_end label placement
Now it is placed after the kernel descriptor, even the section is .rodata, which is wrong. This allows proper code size calculation in MC.
1 parent 251377c commit c77efd9

File tree

4 files changed

+40
-7
lines changed

4 files changed

+40
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "Utils/SIDefinesUtils.h"
3434
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3535
#include "llvm/BinaryFormat/ELF.h"
36+
#include "llvm/CodeGen/AsmPrinterHandler.h"
3637
#include "llvm/CodeGen/MachineFrameInfo.h"
3738
#include "llvm/CodeGen/MachineModuleInfo.h"
3839
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -90,6 +91,22 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() {
9091
createAMDGPUAsmPrinterPass);
9192
}
9293

94+
namespace {
95+
class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {
96+
protected:
97+
AMDGPUAsmPrinter *Asm;
98+
99+
public:
100+
AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}
101+
102+
void beginFunction(const MachineFunction *MF) override {}
103+
104+
void endFunction(const MachineFunction *MF) override { Asm->endFunction(MF); }
105+
106+
void endModule() override {}
107+
};
108+
} // End anonymous namespace
109+
93110
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
94111
std::unique_ptr<MCStreamer> Streamer)
95112
: AsmPrinter(TM, std::move(Streamer)) {
@@ -209,13 +226,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
209226
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
210227
}
211228

212-
void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
229+
void AMDGPUAsmPrinter::endFunction(const MachineFunction *MF) {
213230
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
214231
if (!MFI.isEntryFunction())
215232
return;
216233

217-
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
218-
return;
234+
assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);
219235

220236
auto &Streamer = getTargetStreamer()->getStreamer();
221237
auto &Context = Streamer.getContext();
@@ -351,6 +367,8 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
351367
default:
352368
report_fatal_error("Unexpected code object version");
353369
}
370+
371+
addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));
354372
}
355373

356374
return AsmPrinter::doInitialization(M);

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
118118

119119
void emitFunctionBodyStart() override;
120120

121-
void emitFunctionBodyEnd() override;
121+
void endFunction(const MachineFunction *MF);
122122

123123
void emitImplicitDef(const MachineInstr *MI) const override;
124124

llvm/test/CodeGen/AMDGPU/hsa.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,19 @@
9696
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
9797
; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
9898

99+
; HSA: s_endpgm
100+
; HSA-NEXT: .Lfunc_end0:
101+
; HSA-NEXT: .size simple, .Lfunc_end0-simple
102+
103+
; HSA: .section .rodata,"a",@progbits
104+
99105
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
100106
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
101107

102108
; PRE-GFX10-NOT: .amdhsa_wavefront_size32
103109
; GFX10-W32: .amdhsa_wavefront_size32 1
104110
; GFX10-W64: .amdhsa_wavefront_size32 0
105111

106-
; HSA: .Lfunc_end0:
107-
; HSA: .size simple, .Lfunc_end0-simple
108-
109112
define amdgpu_kernel void @simple(ptr addrspace(1) %out) #0 {
110113
entry:
111114
store i32 0, ptr addrspace(1) %out

llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
1515
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
1616
; VI-NEXT: s_waitcnt vmcnt(0)
1717
; VI-NEXT: s_endpgm
18+
; VI-NEXT: .Lfunc_end0:
19+
; VI-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128
1820
; VI-NEXT: .section .rodata,"a"
1921
; VI-NEXT: .p2align 6
2022
; VI-NEXT: .amdhsa_kernel max_alignment_128
@@ -66,6 +68,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
6668
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
6769
; GFX9-NEXT: s_waitcnt vmcnt(0)
6870
; GFX9-NEXT: s_endpgm
71+
; GFX9-NEXT: .Lfunc_end0:
72+
; GFX9-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128
6973
; GFX9-NEXT: .section .rodata,"a"
7074
; GFX9-NEXT: .p2align 6
7175
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
@@ -126,6 +130,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
126130
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
127131
; VI-NEXT: s_waitcnt vmcnt(0)
128132
; VI-NEXT: s_endpgm
133+
; VI-NEXT: .Lfunc_end1:
134+
; VI-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
129135
; VI-NEXT: .section .rodata,"a"
130136
; VI-NEXT: .p2align 6
131137
; VI-NEXT: .amdhsa_kernel stackrealign_attr
@@ -177,6 +183,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
177183
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
178184
; GFX9-NEXT: s_waitcnt vmcnt(0)
179185
; GFX9-NEXT: s_endpgm
186+
; GFX9-NEXT: .Lfunc_end1:
187+
; GFX9-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
180188
; GFX9-NEXT: .section .rodata,"a"
181189
; GFX9-NEXT: .p2align 6
182190
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
@@ -237,6 +245,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
237245
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
238246
; VI-NEXT: s_waitcnt vmcnt(0)
239247
; VI-NEXT: s_endpgm
248+
; VI-NEXT: .Lfunc_end2:
249+
; VI-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr
240250
; VI-NEXT: .section .rodata,"a"
241251
; VI-NEXT: .p2align 6
242252
; VI-NEXT: .amdhsa_kernel alignstack_attr
@@ -288,6 +298,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
288298
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
289299
; GFX9-NEXT: s_waitcnt vmcnt(0)
290300
; GFX9-NEXT: s_endpgm
301+
; GFX9-NEXT: .Lfunc_end2:
302+
; GFX9-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr
291303
; GFX9-NEXT: .section .rodata,"a"
292304
; GFX9-NEXT: .p2align 6
293305
; GFX9-NEXT: .amdhsa_kernel alignstack_attr

0 commit comments

Comments
 (0)