diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b9769a1baf4d1..f7a7e48295ae5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -440,9 +440,9 @@ void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); -ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); -void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); -extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; +ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(); +void initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(PassRegistry &); +extern char &AMDGPUOpenCLEnqueuedBlockLoweringLegacyID; void initializeGCNNSAReassignPass(PassRegistry &); extern char &GCNNSAReassignID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index 4f5ca08b46c13..fbd15ad176e3b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -31,6 +31,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUOpenCLEnqueuedBlockLowering.h" #include "AMDGPU.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" @@ -48,11 +49,16 @@ using namespace llvm; namespace { /// Lower enqueued blocks. -class AMDGPUOpenCLEnqueuedBlockLowering : public ModulePass { +class AMDGPUOpenCLEnqueuedBlockLowering { +public: + bool run(Module &M); +}; + +class AMDGPUOpenCLEnqueuedBlockLoweringLegacy : public ModulePass { public: static char ID; - explicit AMDGPUOpenCLEnqueuedBlockLowering() : ModulePass(ID) {} + explicit AMDGPUOpenCLEnqueuedBlockLoweringLegacy() : ModulePass(ID) {} private: bool runOnModule(Module &M) override; @@ -60,19 +66,32 @@ class AMDGPUOpenCLEnqueuedBlockLowering : public ModulePass { } // end anonymous namespace -char AMDGPUOpenCLEnqueuedBlockLowering::ID = 0; +char AMDGPUOpenCLEnqueuedBlockLoweringLegacy::ID = 0; -char &llvm::AMDGPUOpenCLEnqueuedBlockLoweringID = - AMDGPUOpenCLEnqueuedBlockLowering::ID; +char &llvm::AMDGPUOpenCLEnqueuedBlockLoweringLegacyID = + AMDGPUOpenCLEnqueuedBlockLoweringLegacy::ID; -INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLowering, DEBUG_TYPE, +INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLoweringLegacy, DEBUG_TYPE, "Lower OpenCL enqueued blocks", false, false) -ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { - return new AMDGPUOpenCLEnqueuedBlockLowering(); +ModulePass *llvm::createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass() { + return new AMDGPUOpenCLEnqueuedBlockLoweringLegacy(); +} + +bool AMDGPUOpenCLEnqueuedBlockLoweringLegacy::runOnModule(Module &M) { + AMDGPUOpenCLEnqueuedBlockLowering Impl; + return Impl.run(M); +} + +PreservedAnalyses +AMDGPUOpenCLEnqueuedBlockLoweringPass::run(Module &M, ModuleAnalysisManager &) { + AMDGPUOpenCLEnqueuedBlockLowering Impl; + if (Impl.run(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); } -bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { +bool AMDGPUOpenCLEnqueuedBlockLowering::run(Module &M) { DenseSet Callers; auto &C = M.getContext(); bool Changed = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.h new file mode 100644 index 0000000000000..16ed7c18d8523 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.h @@ -0,0 +1,23 @@ +//===- AMDGPUOpenCLEnqueuedBlockLowering.h -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_OPENCLENQUEUEDBLOCKLOWERING_H +#define LLVM_LIB_TARGET_AMDGPU_OPENCLENQUEUEDBLOCKLOWERING_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class AMDGPUOpenCLEnqueuedBlockLoweringPass + : public PassInfoMixin { +public: + AMDGPUOpenCLEnqueuedBlockLoweringPass() = default; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_OPENCLENQUEUEDBLOCKLOWERING_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 182e825a59a41..2c5cb05e6600a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -21,6 +21,7 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) MODULE_PASS("amdgpu-sw-lower-lds", AMDGPUSwLowerLDSPass(*this)) +MODULE_PASS("amdgpu-lower-enqueued-block", AMDGPUOpenCLEnqueuedBlockLoweringPass()) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7256eec89008a..2cba3860cfce0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -22,6 +22,7 @@ #include "AMDGPUIGroupLP.h" #include "AMDGPUISelDAGToDAG.h" #include "AMDGPUMacroFusion.h" +#include "AMDGPUOpenCLEnqueuedBlockLowering.h" #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUSplitModule.h" #include "AMDGPUTargetObjectFile.h" @@ -499,7 +500,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPULowerKernelArgumentsPass(*PR); initializeAMDGPUPromoteKernelArgumentsPass(*PR); initializeAMDGPULowerKernelAttributesPass(*PR); - initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR); + initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(*PR); initializeAMDGPUPostLegalizerCombinerPass(*PR); initializeAMDGPUPreLegalizerCombinerPass(*PR); initializeAMDGPURegBankCombinerPass(*PR); @@ -1172,7 +1173,7 @@ void AMDGPUPassConfig::addIRPasses() { addPass(createR600OpenCLImageTypeLoweringPass()); // Replace OpenCL enqueued block function pointers with global variables. - addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass()); + addPass(createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass()); // Lower LDS accesses to global memory pass if address sanitizer is enabled. if (EnableSwLowerLDS) @@ -1939,7 +1940,7 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { addPass(AMDGPUAlwaysInlinePass()); addPass(AlwaysInlinerPass()); - // TODO: Missing OpenCLEnqueuedBlockLowering + addPass(AMDGPUOpenCLEnqueuedBlockLoweringPass()); // Runs before PromoteAlloca so the latter can account for function uses if (EnableLowerModuleLDS) diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll index 9391b50c04a5f..d7c8e47f98883 100644 --- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll +++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs ; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s +; RUN: opt -data-layout=A5 -mtriple=amdgcn -passes=amdgpu-lower-enqueued-block -S < %s | FileCheck %s %struct.ndrange_t = type { i32 } %opencl.queue_t = type opaque