Skip to content

Commit c0e53ac

Browse files
jhuber6tstellar
authored andcommitted
[OpenMP] Make OpenMPOpt aware of the OpenMP runtime's status
The `OpenMPOpt` pass contains optimizations that generate new calls into the OpenMP runtime. This causes problems if we are in a state where the runtime has already been linked statically. Generating these new calls will result in them never being resolved. We should indicate if we are in a "post-link" LTO phase and prevent OpenMPOpt from generating new runtime calls. Generally, it's not desireable for passes to maintain state about the context in which they're called. But this is the only reasonable solution to static linking when we have a pass that generates new runtime calls. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D142646 (cherry picked from commit 0bdde9d)
1 parent 9833a55 commit c0e53ac

File tree

6 files changed

+67
-8
lines changed

6 files changed

+67
-8
lines changed

llvm/include/llvm/Transforms/IPO/OpenMPOpt.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,25 @@ KernelSet getDeviceKernels(Module &M);
3737
/// OpenMP optimizations pass.
3838
class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
3939
public:
40+
OpenMPOptPass() : LTOPhase(ThinOrFullLTOPhase::None) {}
41+
OpenMPOptPass(ThinOrFullLTOPhase LTOPhase) : LTOPhase(LTOPhase) {}
42+
4043
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
44+
45+
private:
46+
const ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None;
4147
};
4248

4349
class OpenMPOptCGSCCPass : public PassInfoMixin<OpenMPOptCGSCCPass> {
4450
public:
51+
OpenMPOptCGSCCPass() : LTOPhase(ThinOrFullLTOPhase::None) {}
52+
OpenMPOptCGSCCPass(ThinOrFullLTOPhase LTOPhase) : LTOPhase(LTOPhase) {}
53+
4554
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
4655
LazyCallGraph &CG, CGSCCUpdateResult &UR);
56+
57+
private:
58+
const ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None;
4759
};
4860

4961
} // end namespace llvm

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,7 +1604,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
16041604
}
16051605

16061606
// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1607-
MPM.addPass(OpenMPOptPass());
1607+
MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
16081608

16091609
// Remove unused virtual tables to improve the quality of code generated by
16101610
// whole-program devirtualization and bitset lowering.
@@ -1811,7 +1811,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
18111811
addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
18121812

18131813
// Run the OpenMPOpt CGSCC pass again late.
1814-
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
1814+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
1815+
OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
18151816

18161817
invokePeepholeEPCallbacks(MainFPM, Level);
18171818
MainFPM.addPass(JumpThreadingPass());

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ MODULE_PASS("always-inline", AlwaysInlinerPass())
4444
MODULE_PASS("attributor", AttributorPass())
4545
MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
4646
MODULE_PASS("openmp-opt", OpenMPOptPass())
47+
MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
4748
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
4849
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
4950
MODULE_PASS("cg-profile", CGProfilePass())

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ struct AAICVTracker;
188188
struct OMPInformationCache : public InformationCache {
189189
OMPInformationCache(Module &M, AnalysisGetter &AG,
190190
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
191-
KernelSet &Kernels)
191+
KernelSet &Kernels, bool OpenMPPostLink)
192192
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
193-
Kernels(Kernels) {
193+
Kernels(Kernels), OpenMPPostLink(OpenMPPostLink) {
194194

195195
OMPBuilder.initialize();
196196
initializeRuntimeFunctions(M);
@@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache {
448448
CI->setCallingConv(Fn->getCallingConv());
449449
}
450450

451+
// Helper function to determine if it's legal to create a call to the runtime
452+
// functions.
453+
bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) {
454+
// We can always emit calls if we haven't yet linked in the runtime.
455+
if (!OpenMPPostLink)
456+
return true;
457+
458+
// Once the runtime has been already been linked in we cannot emit calls to
459+
// any undefined functions.
460+
for (RuntimeFunction Fn : Fns) {
461+
RuntimeFunctionInfo &RFI = RFIs[Fn];
462+
463+
if (RFI.Declaration && RFI.Declaration->isDeclaration())
464+
return false;
465+
}
466+
return true;
467+
}
468+
451469
/// Helper to initialize all runtime function information for those defined
452470
/// in OpenMPKinds.def.
453471
void initializeRuntimeFunctions(Module &M) {
@@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache {
523541

524542
/// Collection of known OpenMP runtime functions..
525543
DenseSet<const Function *> RTLFunctions;
544+
545+
/// Indicates if we have already linked in the OpenMP device library.
546+
bool OpenMPPostLink = false;
526547
};
527548

528549
template <typename Ty, bool InsertInvalidates = true>
@@ -1412,7 +1433,10 @@ struct OpenMPOpt {
14121433
Changed |= WasSplit;
14131434
return WasSplit;
14141435
};
1415-
RFI.foreachUse(SCC, SplitMemTransfers);
1436+
if (OMPInfoCache.runtimeFnsAvailable(
1437+
{OMPRTL___tgt_target_data_begin_mapper_issue,
1438+
OMPRTL___tgt_target_data_begin_mapper_wait}))
1439+
RFI.foreachUse(SCC, SplitMemTransfers);
14161440

14171441
return Changed;
14181442
}
@@ -3914,6 +3938,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
39143938
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
39153939
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
39163940

3941+
// We cannot change to SPMD mode if the runtime functions aren't availible.
3942+
if (!OMPInfoCache.runtimeFnsAvailable(
3943+
{OMPRTL___kmpc_get_hardware_thread_id_in_block,
3944+
OMPRTL___kmpc_barrier_simple_spmd}))
3945+
return false;
3946+
39173947
if (!SPMDCompatibilityTracker.isAssumed()) {
39183948
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
39193949
if (!NonCompatibleI)
@@ -4021,6 +4051,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
40214051
if (!ReachedKnownParallelRegions.isValidState())
40224052
return ChangeStatus::UNCHANGED;
40234053

4054+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
4055+
if (!OMPInfoCache.runtimeFnsAvailable(
4056+
{OMPRTL___kmpc_get_hardware_num_threads_in_block,
4057+
OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
4058+
OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
4059+
return ChangeStatus::UNCHANGED;
4060+
40244061
const int InitModeArgNo = 1;
40254062
const int InitUseStateMachineArgNo = 2;
40264063

@@ -4167,7 +4204,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
41674204
BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
41684205

41694206
Module &M = *Kernel->getParent();
4170-
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
41714207
FunctionCallee BlockHwSizeFn =
41724208
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
41734209
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
@@ -5343,7 +5379,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
53435379
BumpPtrAllocator Allocator;
53445380
CallGraphUpdater CGUpdater;
53455381

5346-
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels);
5382+
bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5383+
LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5384+
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels,
5385+
PostLink);
53475386

53485387
unsigned MaxFixpointIterations =
53495388
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5417,9 +5456,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
54175456
CallGraphUpdater CGUpdater;
54185457
CGUpdater.initialize(CG, C, AM, UR);
54195458

5459+
bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5460+
LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
54205461
SetVector<Function *> Functions(SCC.begin(), SCC.end());
54215462
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
5422-
/*CGSCC*/ &Functions, Kernels);
5463+
/*CGSCC*/ &Functions, Kernels, PostLink);
54235464

54245465
unsigned MaxFixpointIterations =
54255466
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;

llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU
33
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX
44
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=AMDGPU
5+
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU
56
; RUN: opt --mtriple=nvptx64-- -openmp-opt-disable-state-machine-rewrite -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=NVPTX
7+
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX
68

79
;; void p0(void);
810
;; void p1(void);

llvm/test/Transforms/OpenMP/spmdization.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=AMDGPU
33
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=NVPTX
44
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED
5+
; RUN: opt --mtriple=amdgcn-amd-amdhsa --data-layout=A5 -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=AMDGPU-DISABLED
56
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefix=NVPTX-DISABLED
7+
; RUN: opt --mtriple=nvptx64-- -S -passes=openmp-opt-postlink < %s | FileCheck %s --check-prefix=NVPTX-DISABLED
68

79
;; void unknown(void);
810
;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable")));

0 commit comments

Comments
 (0)