diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index a8d0bb746d2ef..63791ed9ee1aa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -33,6 +33,7 @@ #include "Utils/SIDefinesUtils.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" @@ -90,6 +91,22 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() { createAMDGPUAsmPrinterPass); } +namespace { +class AMDGPUAsmPrinterHandler : public AsmPrinterHandler { +protected: + AMDGPUAsmPrinter *Asm; + +public: + AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {} + + void beginFunction(const MachineFunction *MF) override {} + + void endFunction(const MachineFunction *MF) override { Asm->endFunction(MF); } + + void endModule() override {} +}; +} // End anonymous namespace + AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) { @@ -209,13 +226,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo); } -void AMDGPUAsmPrinter::emitFunctionBodyEnd() { +void AMDGPUAsmPrinter::endFunction(const MachineFunction *MF) { const SIMachineFunctionInfo &MFI = *MF->getInfo(); if (!MFI.isEntryFunction()) return; - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) - return; + assert(TM.getTargetTriple().getOS() == Triple::AMDHSA); auto &Streamer = getTargetStreamer()->getStreamer(); auto &Context = Streamer.getContext(); @@ -351,6 +367,8 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { default: report_fatal_error("Unexpected code object version"); } + + addAsmPrinterHandler(std::make_unique(this)); } return AsmPrinter::doInitialization(M); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 2c959d7dbbd07..058970b2a6f44 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -118,7 +118,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void emitFunctionBodyStart() override; - void emitFunctionBodyEnd() override; + void endFunction(const MachineFunction *MF); void emitImplicitDef(const MachineInstr *MI) const override; diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll index 37476203fbfad..6e5f16feb0773 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -96,6 +96,12 @@ ; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off +; HSA: s_endpgm +; HSA-NEXT: .Lfunc_end0: +; HSA-NEXT: .size simple, .Lfunc_end0-simple + +; HSA: .section .rodata,"a",@progbits + ; HSA: .amdhsa_user_sgpr_private_segment_buffer 1 ; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1 @@ -103,9 +109,6 @@ ; GFX10-W32: .amdhsa_wavefront_size32 1 ; GFX10-W64: .amdhsa_wavefront_size32 0 -; HSA: .Lfunc_end0: -; HSA: .size simple, .Lfunc_end0-simple - define amdgpu_kernel void @simple(ptr addrspace(1) %out) #0 { entry: store i32 0, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll index 6ddf0986755f9..2d34169f9f34c 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll @@ -15,6 +15,8 @@ define amdgpu_kernel void @max_alignment_128() #0 { ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm +; VI-NEXT: .Lfunc_end0: +; VI-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128 ; VI-NEXT: .section .rodata,"a" ; VI-NEXT: .p2align 6 ; VI-NEXT: .amdhsa_kernel max_alignment_128 @@ -66,6 +68,8 @@ define amdgpu_kernel void @max_alignment_128() #0 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm +; GFX9-NEXT: .Lfunc_end0: +; GFX9-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128 ; GFX9-NEXT: .section .rodata,"a" ; GFX9-NEXT: .p2align 6 ; GFX9-NEXT: .amdhsa_kernel max_alignment_128 @@ -126,6 +130,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 { ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm +; VI-NEXT: .Lfunc_end1: +; VI-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr ; VI-NEXT: .section .rodata,"a" ; VI-NEXT: .p2align 6 ; VI-NEXT: .amdhsa_kernel stackrealign_attr @@ -177,6 +183,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm +; GFX9-NEXT: .Lfunc_end1: +; GFX9-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr ; GFX9-NEXT: .section .rodata,"a" ; GFX9-NEXT: .p2align 6 ; GFX9-NEXT: .amdhsa_kernel stackrealign_attr @@ -237,6 +245,8 @@ define amdgpu_kernel void @alignstack_attr() #2 { ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm +; VI-NEXT: .Lfunc_end2: +; VI-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr ; VI-NEXT: .section .rodata,"a" ; VI-NEXT: .p2align 6 ; VI-NEXT: .amdhsa_kernel alignstack_attr @@ -288,6 +298,8 @@ define amdgpu_kernel void @alignstack_attr() #2 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm +; GFX9-NEXT: .Lfunc_end2: +; GFX9-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr ; GFX9-NEXT: .section .rodata,"a" ; GFX9-NEXT: .p2align 6 ; GFX9-NEXT: .amdhsa_kernel alignstack_attr