Skip to content

Commit cf668cc

Browse files
committed
[AMDGPU] Fix .Lfunc_end label placement
Now it is placed after the kernel descriptor, even the section is .rodata, which is wrong. This allows proper code size calculation in MC.
1 parent 93a1184 commit cf668cc

File tree

4 files changed

+42
-7
lines changed

4 files changed

+42
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "Utils/SIDefinesUtils.h"
3434
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
3535
#include "llvm/BinaryFormat/ELF.h"
36+
#include "llvm/CodeGen/AsmPrinterHandler.h"
3637
#include "llvm/CodeGen/MachineFrameInfo.h"
3738
#include "llvm/CodeGen/MachineModuleInfo.h"
3839
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -90,6 +91,24 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() {
9091
createAMDGPUAsmPrinterPass);
9192
}
9293

94+
namespace {
95+
class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {
96+
protected:
97+
AMDGPUAsmPrinter *Asm;
98+
99+
public:
100+
AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}
101+
102+
virtual void beginFunction(const MachineFunction *MF) override {}
103+
104+
virtual void endFunction(const MachineFunction *MF) override {
105+
Asm->endFunction(MF);
106+
}
107+
108+
virtual void endModule() override {}
109+
};
110+
} // End anonymous namespace
111+
93112
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
94113
std::unique_ptr<MCStreamer> Streamer)
95114
: AsmPrinter(TM, std::move(Streamer)) {
@@ -209,13 +228,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
209228
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
210229
}
211230

212-
void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
231+
void AMDGPUAsmPrinter::endFunction(const MachineFunction *MF) {
213232
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
214233
if (!MFI.isEntryFunction())
215234
return;
216235

217-
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
218-
return;
236+
assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);
219237

220238
auto &Streamer = getTargetStreamer()->getStreamer();
221239
auto &Context = Streamer.getContext();
@@ -351,6 +369,8 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
351369
default:
352370
report_fatal_error("Unexpected code object version");
353371
}
372+
373+
addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));
354374
}
355375

356376
return AsmPrinter::doInitialization(M);

llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
120120

121121
void emitFunctionBodyStart() override;
122122

123-
void emitFunctionBodyEnd() override;
123+
void endFunction(const MachineFunction *MF);
124124

125125
void emitImplicitDef(const MachineInstr *MI) const override;
126126

llvm/test/CodeGen/AMDGPU/hsa.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,19 @@
9696
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
9797
; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
9898

99+
; HSA: s_endpgm
100+
; HSA-NEXT: .Lfunc_end0:
101+
; HSA-NEXT: .size simple, .Lfunc_end0-simple
102+
103+
; HSA: .section .rodata,"a",@progbits
104+
99105
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
100106
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
101107

102108
; PRE-GFX10-NOT: .amdhsa_wavefront_size32
103109
; GFX10-W32: .amdhsa_wavefront_size32 1
104110
; GFX10-W64: .amdhsa_wavefront_size32 0
105111

106-
; HSA: .Lfunc_end0:
107-
; HSA: .size simple, .Lfunc_end0-simple
108-
109112
define amdgpu_kernel void @simple(ptr addrspace(1) %out) #0 {
110113
entry:
111114
store i32 0, ptr addrspace(1) %out

llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
1515
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
1616
; VI-NEXT: s_waitcnt vmcnt(0)
1717
; VI-NEXT: s_endpgm
18+
; VI-NEXT: .Lfunc_end0:
19+
; VI-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128
1820
; VI-NEXT: .section .rodata,"a"
1921
; VI-NEXT: .p2align 6
2022
; VI-NEXT: .amdhsa_kernel max_alignment_128
@@ -66,6 +68,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
6668
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
6769
; GFX9-NEXT: s_waitcnt vmcnt(0)
6870
; GFX9-NEXT: s_endpgm
71+
; GFX9-NEXT: .Lfunc_end0:
72+
; GFX9-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128
6973
; GFX9-NEXT: .section .rodata,"a"
7074
; GFX9-NEXT: .p2align 6
7175
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
@@ -126,6 +130,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
126130
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
127131
; VI-NEXT: s_waitcnt vmcnt(0)
128132
; VI-NEXT: s_endpgm
133+
; VI-NEXT: .Lfunc_end1:
134+
; VI-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
129135
; VI-NEXT: .section .rodata,"a"
130136
; VI-NEXT: .p2align 6
131137
; VI-NEXT: .amdhsa_kernel stackrealign_attr
@@ -177,6 +183,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
177183
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
178184
; GFX9-NEXT: s_waitcnt vmcnt(0)
179185
; GFX9-NEXT: s_endpgm
186+
; GFX9-NEXT: .Lfunc_end1:
187+
; GFX9-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
180188
; GFX9-NEXT: .section .rodata,"a"
181189
; GFX9-NEXT: .p2align 6
182190
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
@@ -237,6 +245,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
237245
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
238246
; VI-NEXT: s_waitcnt vmcnt(0)
239247
; VI-NEXT: s_endpgm
248+
; VI-NEXT: .Lfunc_end2:
249+
; VI-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr
240250
; VI-NEXT: .section .rodata,"a"
241251
; VI-NEXT: .p2align 6
242252
; VI-NEXT: .amdhsa_kernel alignstack_attr
@@ -288,6 +298,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
288298
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
289299
; GFX9-NEXT: s_waitcnt vmcnt(0)
290300
; GFX9-NEXT: s_endpgm
301+
; GFX9-NEXT: .Lfunc_end2:
302+
; GFX9-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr
291303
; GFX9-NEXT: .section .rodata,"a"
292304
; GFX9-NEXT: .p2align 6
293305
; GFX9-NEXT: .amdhsa_kernel alignstack_attr

0 commit comments

Comments
 (0)