Skip to content

Commit 644ff03

Browse files
committed
[OMPT][Offload] Refactor async-info data passing
This patch moves the OMPT-specific data type into its own header. At the same time it changes the type of the pointer stores in the async info object to a void* to not leak the OMPT specific data type into the general interface. The type change to void* was not part of the original WIP patch in https://github.com/AMD-Lightning-Internal/llvm-project/pull/2282 but I thought I can fold it into this refactoring. If this is unwanted, I'm happy to revert that change.
1 parent a53433c commit 644ff03

File tree

6 files changed

+76
-42
lines changed

6 files changed

+76
-42
lines changed

offload/include/OpenMP/OMPT/Interface.h

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// Only provide functionality if target OMPT support is enabled
1717
#ifdef OMPT_SUPPORT
1818
#include "Callback.h"
19+
#include "OmptEventInfoTy.h"
1920
#include "Shared/APITypes.h"
2021
#include "Shared/Debug.h"
2122
#include "omp-tools.h"
@@ -462,14 +463,6 @@ template <typename FunctionPairTy, typename... ArgsTy>
462463
InterfaceRAII(FunctionPairTy Callbacks, ArgsTy... Args)
463464
-> InterfaceRAII<FunctionPairTy, ArgsTy...>;
464465

465-
/// Holds info needed to fill asynchronous trace records
466-
struct OmptEventInfoTy {
467-
/// The granted number of teams at runtime
468-
uint64_t NumTeams;
469-
/// Pointer to the actual buffer storage location
470-
ompt_record_ompt_t *TraceRecord;
471-
};
472-
473466
/// Similar to the original InterfaceRAII this class is used for tracing and
474467
/// extends the original with async capabilities. That is: It takes an
475468
/// additional AsyncInfo reference as argument to populate the relevant fields.
@@ -488,13 +481,15 @@ class TracerInterfaceRAII {
488481
auto Record = begin();
489482
// Gets freed in interface.cpp, functions
490483
// targetKernel and targetData once launching target operations returns.
491-
if (!AI->OmptEventInfo)
492-
AI->OmptEventInfo = new OmptEventInfoTy();
493-
AI->OmptEventInfo->TraceRecord = Record;
494-
AI->OmptEventInfo->NumTeams = 0;
484+
if (!AI->ProfilerData)
485+
AI->ProfilerData = new OmptEventInfoTy();
486+
// TODO: Make sure this has the right type
487+
auto OEI = reinterpret_cast<OmptEventInfoTy *>(AI->ProfilerData);
488+
OEI->TraceRecord = Record;
489+
OEI->NumTeams = 0;
495490
} else {
496491
// Actively prevent further tracing of this event
497-
AI->OmptEventInfo = nullptr;
492+
AI->ProfilerData = nullptr;
498493
}
499494
}
500495

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===- OmptEventInfoTy.h - OMPT specific trace record data ------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Data structure used to communicate OMPT specific profiler data from the
10+
// high-level libomptarget into the vendor-specific plugins
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef OFFLOAD_INCLUDE_OPENMP_OMPT_OMPTEVENTINFOTY_H
15+
#define OFFLOAD_INCLUDE_OPENMP_OMPT_OMPTEVENTINFOTY_H
16+
17+
#include "Shared/Debug.h"
18+
19+
struct ompt_record_ompt_t;
20+
21+
namespace llvm {
22+
namespace omp {
23+
namespace target {
24+
namespace ompt {
25+
26+
/// Holds info needed to fill asynchronous trace records
27+
struct OmptEventInfoTy {
28+
/// The granted number of teams at runtime
29+
uint64_t NumTeams;
30+
/// Pointer to the actual buffer storage location
31+
ompt_record_ompt_t *TraceRecord;
32+
};
33+
34+
} // namespace ompt
35+
} // namespace target
36+
} // namespace omp
37+
} // namespace llvm
38+
39+
#endif // OFFLOAD_INCLUDE_OPENMP_OMPT_OMPTEVENTINFOTY_H

offload/include/Shared/APITypes.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,6 @@ struct __tgt_device_binary {
7171

7272
// clang-format on
7373

74-
// OMPT: Forward declare for function pointer type
75-
namespace llvm::omp::target::ompt {
76-
struct OmptEventInfoTy;
77-
} // namespace llvm::omp::target::ompt
78-
7974
/// This struct contains information exchanged between different asynchronous
8075
/// operations for device-dependent optimization and potential synchronization
8176
struct __tgt_async_info {
@@ -98,8 +93,11 @@ struct __tgt_async_info {
9893

9994
/// Use for sync interface. When false => synchronous execution
10095
bool ExecAsync = true;
101-
/// Maintain the actal data for the OMPT stuff
102-
llvm::omp::target::ompt::OmptEventInfoTy *OmptEventInfo = nullptr;
96+
/// Maintain the actal data for OMPT.
97+
/// TODO: Moving forward, this should become a void* to not expose OMPT data
98+
/// type in general types
99+
// llvm::omp::target::ompt::OmptEventInfoTy *OmptEventInfo = nullptr;
100+
void *ProfilerData = nullptr;
103101
};
104102

105103
/// This struct contains all of the arguments to a target kernel region launch.

offload/libomptarget/interface.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,8 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
201201
}
202202

203203
#ifdef OMPT_SUPPORT
204-
if (__tgt_async_info *AI = AsyncInfo; AI->OmptEventInfo)
205-
delete AI->OmptEventInfo;
204+
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData)
205+
delete AI->ProfilerData;
206206
#endif
207207

208208
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
@@ -491,13 +491,13 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
491491
}
492492

493493
#ifdef OMPT_SUPPORT
494-
if (__tgt_async_info *AI = AsyncInfo; AI->OmptEventInfo)
495-
delete AI->OmptEventInfo;
494+
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData)
495+
delete AI->ProfilerData;
496496

497497
for (TargetAsyncInfoTy *LocalTAI : TargetAsyncInfos) {
498498
AsyncInfoTy &AsyncInfo = *LocalTAI;
499-
if (__tgt_async_info *AI = AsyncInfo; AI->OmptEventInfo) {
500-
delete AI->OmptEventInfo;
499+
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData) {
500+
delete AI->ProfilerData;
501501
}
502502
}
503503
#endif

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "ErrorReporting.h"
3030
#include "OpenMP/OMPT/Interface.h"
3131
#include "OpenMP/OMPT/OmptCommonDefs.h"
32+
#include "OpenMP/OMPT/OmptEventInfoTy.h"
3233
#include "Shared/APITypes.h"
3334
#include "Shared/Debug.h"
3435
#include "Shared/Environment.h"
@@ -183,14 +184,14 @@ static void printOmptEventInfoTy(ompt::OmptEventInfoTy &OmptEventInfo) {
183184
static std::unique_ptr<ompt::OmptEventInfoTy>
184185
getOrNullOmptEventInfo(AsyncInfoWrapperTy &AsyncInfoWrapper) {
185186
__tgt_async_info *AI = AsyncInfoWrapper;
186-
if (!AI || !AI->OmptEventInfo)
187+
if (!AI || !AI->ProfilerData)
187188
return nullptr;
188189

189-
// We need to copy the content of the OmptEventInfo object to persist it
190+
// We need to copy the content of the ProfilerData object to persist it
190191
// between multiple async operations.
191-
auto LocalOmptEventInfo =
192-
std::make_unique<ompt::OmptEventInfoTy>(*AI->OmptEventInfo);
193-
printOmptEventInfoTy(*AI->OmptEventInfo);
192+
auto LocalOmptEventInfo = std::make_unique<ompt::OmptEventInfoTy>(
193+
*reinterpret_cast<ompt::OmptEventInfoTy *>(AI->ProfilerData));
194+
// printOmptEventInfoTy(*AI->ProfilerData);
194195
printOmptEventInfoTy(*LocalOmptEventInfo);
195196
return LocalOmptEventInfo;
196197
}
@@ -4347,7 +4348,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
43474348
// Implementation sanity checks: either unified_shared_memory or auto
43484349
// zero-copy, not both
43494350
if (isUnifiedSharedMemory && isAutoZeroCopy)
4350-
return Plugin::error(ErrorCode::UNKNOWN,
4351+
return Plugin::error(ErrorCode::UNKNOWN,
43514352
"Internal runtime error: cannot be both "
43524353
"unified_shared_memory and auto zero-copy.");
43534354

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@ AsyncInfoWrapperTy::AsyncInfoWrapperTy(GenericDeviceTy &Device,
456456
__tgt_async_info *AsyncInfoPtr)
457457
: Device(Device),
458458
AsyncInfoPtr(AsyncInfoPtr ? AsyncInfoPtr : &LocalAsyncInfo) {
459-
LocalAsyncInfo.OmptEventInfo = nullptr;
459+
LocalAsyncInfo.ProfilerData = nullptr;
460460
}
461461

462462
void AsyncInfoWrapperTy::finalize(Error &Err) {
@@ -525,7 +525,7 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
525525
} else {
526526
// Check that the retrieved execution mode is valid.
527527
if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
528-
return Plugin::error(ErrorCode::UNKNOWN,
528+
return Plugin::error(ErrorCode::UNKNOWN,
529529
"Invalid execution mode %d for '%s'",
530530
ExecModeGlobal.getValue(), getName());
531531
ExecutionMode = ExecModeGlobal.getValue();
@@ -613,22 +613,22 @@ GenericKernelTy::getKernelLaunchEnvironment(
613613
DPxPTR(&LocalKLE), DPxPTR(*AllocOrErr),
614614
sizeof(KernelLaunchEnvironmentTy));
615615

616-
// The OmptEventInfo at this point will have a callback for a kernel launch,
616+
// The ProfilerData at this point will have a callback for a kernel launch,
617617
// not a data-op. This is due to the "external" operation being a kernel
618618
// launch and the data submit here being an implementation detail. We
619-
// temporarily set the OmptEventInfo to nullptr, such that we disable the
619+
// temporarily set the ProfilerData to nullptr, such that we disable the
620620
// timing etc further down to not trigger assertions or report implementation
621621
// detail.
622622
__tgt_async_info *AI = AsyncInfoWrapper;
623-
if (AI && AI->OmptEventInfo) {
624-
auto LocalOEI = AI->OmptEventInfo;
625-
AI->OmptEventInfo = nullptr;
623+
if (AI && AI->ProfilerData) {
624+
auto LocalOEI = AI->ProfilerData;
625+
AI->ProfilerData = nullptr;
626626
auto Err = GenericDevice.dataSubmit(*AllocOrErr, &LocalKLE,
627627
sizeof(KernelLaunchEnvironmentTy),
628628
AsyncInfoWrapper);
629629
if (Err)
630630
return Err;
631-
AI->OmptEventInfo = LocalOEI;
631+
AI->ProfilerData = LocalOEI;
632632
return static_cast<KernelLaunchEnvironmentTy *>(*AllocOrErr);
633633
}
634634

@@ -783,10 +783,11 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
783783
OMPT_IF_TRACING_ENABLED(if (llvm::omp::target::ompt::isTracedDevice(
784784
getDeviceId(&GenericDevice))) {
785785
__tgt_async_info *AI = AsyncInfoWrapper;
786-
if (AI->OmptEventInfo != nullptr) {
786+
if (AI->ProfilerData != nullptr) {
787787
// Set number of granted teams for OMPT
788788
setOmptGrantedNumTeams(NumBlocks[0]);
789-
AI->OmptEventInfo->NumTeams = NumBlocks[0];
789+
reinterpret_cast<OmptEventInfoTy *>(AI->ProfilerData)->NumTeams =
790+
NumBlocks[0];
790791
}
791792
});
792793

0 commit comments

Comments
 (0)