@@ -188,9 +188,9 @@ struct AAICVTracker;
188
188
struct OMPInformationCache : public InformationCache {
189
189
OMPInformationCache (Module &M, AnalysisGetter &AG,
190
190
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
191
- KernelSet &Kernels)
191
+ KernelSet &Kernels, bool OpenMPPostLink )
192
192
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
193
- Kernels (Kernels) {
193
+ Kernels (Kernels), OpenMPPostLink(OpenMPPostLink) {
194
194
195
195
OMPBuilder.initialize ();
196
196
initializeRuntimeFunctions (M);
@@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache {
448
448
CI->setCallingConv (Fn->getCallingConv ());
449
449
}
450
450
451
+ // Helper function to determine if it's legal to create a call to the runtime
452
+ // functions.
453
+ bool runtimeFnsAvailable (ArrayRef<RuntimeFunction> Fns) {
454
+ // We can always emit calls if we haven't yet linked in the runtime.
455
+ if (!OpenMPPostLink)
456
+ return true ;
457
+
458
+ // Once the runtime has been already been linked in we cannot emit calls to
459
+ // any undefined functions.
460
+ for (RuntimeFunction Fn : Fns) {
461
+ RuntimeFunctionInfo &RFI = RFIs[Fn];
462
+
463
+ if (RFI.Declaration && RFI.Declaration ->isDeclaration ())
464
+ return false ;
465
+ }
466
+ return true ;
467
+ }
468
+
451
469
// / Helper to initialize all runtime function information for those defined
452
470
// / in OpenMPKinds.def.
453
471
void initializeRuntimeFunctions (Module &M) {
@@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache {
523
541
524
542
// / Collection of known OpenMP runtime functions..
525
543
DenseSet<const Function *> RTLFunctions;
544
+
545
+ // / Indicates if we have already linked in the OpenMP device library.
546
+ bool OpenMPPostLink = false ;
526
547
};
527
548
528
549
template <typename Ty, bool InsertInvalidates = true >
@@ -1412,7 +1433,10 @@ struct OpenMPOpt {
1412
1433
Changed |= WasSplit;
1413
1434
return WasSplit;
1414
1435
};
1415
- RFI.foreachUse (SCC, SplitMemTransfers);
1436
+ if (OMPInfoCache.runtimeFnsAvailable (
1437
+ {OMPRTL___tgt_target_data_begin_mapper_issue,
1438
+ OMPRTL___tgt_target_data_begin_mapper_wait}))
1439
+ RFI.foreachUse (SCC, SplitMemTransfers);
1416
1440
1417
1441
return Changed;
1418
1442
}
@@ -3914,6 +3938,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
3914
3938
bool changeToSPMDMode (Attributor &A, ChangeStatus &Changed) {
3915
3939
auto &OMPInfoCache = static_cast <OMPInformationCache &>(A.getInfoCache ());
3916
3940
3941
+ // We cannot change to SPMD mode if the runtime functions aren't availible.
3942
+ if (!OMPInfoCache.runtimeFnsAvailable (
3943
+ {OMPRTL___kmpc_get_hardware_thread_id_in_block,
3944
+ OMPRTL___kmpc_barrier_simple_spmd}))
3945
+ return false ;
3946
+
3917
3947
if (!SPMDCompatibilityTracker.isAssumed ()) {
3918
3948
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
3919
3949
if (!NonCompatibleI)
@@ -4021,6 +4051,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
4021
4051
if (!ReachedKnownParallelRegions.isValidState ())
4022
4052
return ChangeStatus::UNCHANGED;
4023
4053
4054
+ auto &OMPInfoCache = static_cast <OMPInformationCache &>(A.getInfoCache ());
4055
+ if (!OMPInfoCache.runtimeFnsAvailable (
4056
+ {OMPRTL___kmpc_get_hardware_num_threads_in_block,
4057
+ OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
4058
+ OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
4059
+ return ChangeStatus::UNCHANGED;
4060
+
4024
4061
const int InitModeArgNo = 1 ;
4025
4062
const int InitUseStateMachineArgNo = 2 ;
4026
4063
@@ -4167,7 +4204,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
4167
4204
BranchInst::Create (IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
4168
4205
4169
4206
Module &M = *Kernel->getParent ();
4170
- auto &OMPInfoCache = static_cast <OMPInformationCache &>(A.getInfoCache ());
4171
4207
FunctionCallee BlockHwSizeFn =
4172
4208
OMPInfoCache.OMPBuilder .getOrCreateRuntimeFunction (
4173
4209
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
@@ -5343,7 +5379,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
5343
5379
BumpPtrAllocator Allocator;
5344
5380
CallGraphUpdater CGUpdater;
5345
5381
5346
- OMPInformationCache InfoCache (M, AG, Allocator, /* CGSCC*/ nullptr , Kernels);
5382
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5383
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5384
+ OMPInformationCache InfoCache (M, AG, Allocator, /* CGSCC*/ nullptr , Kernels,
5385
+ PostLink);
5347
5386
5348
5387
unsigned MaxFixpointIterations =
5349
5388
(isOpenMPDevice (M)) ? SetFixpointIterations : 32 ;
@@ -5417,9 +5456,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
5417
5456
CallGraphUpdater CGUpdater;
5418
5457
CGUpdater.initialize (CG, C, AM, UR);
5419
5458
5459
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
5460
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
5420
5461
SetVector<Function *> Functions (SCC.begin (), SCC.end ());
5421
5462
OMPInformationCache InfoCache (*(Functions.back ()->getParent ()), AG, Allocator,
5422
- /* CGSCC*/ &Functions, Kernels);
5463
+ /* CGSCC*/ &Functions, Kernels, PostLink );
5423
5464
5424
5465
unsigned MaxFixpointIterations =
5425
5466
(isOpenMPDevice (M)) ? SetFixpointIterations : 32 ;
0 commit comments