|
9 | 9 | //===----------------------------------------------------------------------===// |
10 | 10 |
|
11 | 11 | #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" |
| 12 | +#include "llvm/ADT/DenseSet.h" |
12 | 13 | #include "llvm/Demangle/Demangle.h" |
13 | 14 | #include "llvm/IR/Function.h" |
14 | 15 | #include "llvm/IR/IRBuilder.h" |
@@ -91,29 +92,35 @@ ModulePass *llvm::createSYCLLowerWGLocalMemoryLegacyPass() { |
91 | 92 | // inlined first before each __sycl_allocateLocalMemory call can be lowered to a |
92 | 93 | // unique global variable. Inlining them here so that this pass doesn't have |
93 | 94 | // implicit dependency on AlwaysInlinerPass. |
| 95 | +// |
| 96 | +// syclcompat::local_mem, which represents a unique allocation, calls |
| 97 | +// group_local_memory_for_overwrite. So local_mem should be inlined as well. |
94 | 98 | static bool inlineGroupLocalMemoryFunc(Module &M) { |
95 | 99 | Function *ALMFunc = M.getFunction(SYCL_ALLOCLOCALMEM_CALL); |
96 | 100 | if (!ALMFunc || ALMFunc->use_empty()) |
97 | 101 | return false; |
98 | 102 |
|
99 | | - bool Changed = false; |
100 | | - for (auto *U : ALMFunc->users()) { |
101 | | - auto *Caller = cast<CallInst>(U)->getFunction(); |
102 | | - if (!Caller->hasFnAttribute("sycl_forceinline")) { |
103 | | - // Already inlined. |
104 | | - continue; |
105 | | - } |
106 | | - for (auto *U2 : make_early_inc_range(Caller->users())) { |
107 | | - auto *CI = cast<CallInst>(U2); |
108 | | - InlineFunctionInfo IFI; |
109 | | - [[maybe_unused]] auto Result = InlineFunction(*CI, IFI); |
110 | | - assert(Result.isSuccess() && "inlining failed"); |
| 103 | + SmallVector<Function *, 4> WorkList{ALMFunc}; |
| 104 | + DenseSet<Function *> Visited; |
| 105 | + while (!WorkList.empty()) { |
| 106 | + auto *F = WorkList.pop_back_val(); |
| 107 | + for (auto *U : make_early_inc_range(F->users())) { |
| 108 | + auto *CI = cast<CallInst>(U); |
| 109 | + auto *Caller = CI->getFunction(); |
| 110 | + if (Caller->hasFnAttribute("sycl-forceinline") && |
| 111 | + Visited.insert(Caller).second) |
| 112 | + WorkList.push_back(Caller); |
| 113 | + if (F != ALMFunc) { |
| 114 | + InlineFunctionInfo IFI; |
| 115 | + [[maybe_unused]] auto Result = InlineFunction(*CI, IFI); |
| 116 | + assert(Result.isSuccess() && "inlining failed"); |
| 117 | + } |
111 | 118 | } |
112 | | - Caller->eraseFromParent(); |
113 | | - Changed = true; |
114 | 119 | } |
| 120 | + for (auto *F : Visited) |
| 121 | + F->eraseFromParent(); |
115 | 122 |
|
116 | | - return Changed; |
| 123 | + return !Visited.empty(); |
117 | 124 | } |
118 | 125 |
|
119 | 126 | // TODO: It should be checked that __sycl_allocateLocalMemory (or its source |
|
0 commit comments