Skip to content

Commit 51fb1b1

Browse files
authored
[NFC][OMPT] Make data type non-OMPT specific (llvm#3768)
2 parents 73ad552 + 48c2620 commit 51fb1b1

File tree

2 files changed

+56
-44
lines changed

2 files changed

+56
-44
lines changed

offload/libomptarget/interface.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,11 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
201201
}
202202

203203
#ifdef OMPT_SUPPORT
204-
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData)
205-
delete AI->ProfilerData;
204+
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData) {
205+
auto OmptData = reinterpret_cast<OmptEventInfoTy*>(AI->ProfilerData);
206+
// These deletes are going to go into the OmptProfiler
207+
delete OmptData;
208+
}
206209
#endif
207210

208211
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
@@ -491,13 +494,18 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
491494
}
492495

493496
#ifdef OMPT_SUPPORT
494-
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData)
495-
delete AI->ProfilerData;
497+
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData) {
498+
auto OmptData = reinterpret_cast<OmptEventInfoTy*>(AI->ProfilerData);
499+
// These deletes are going to go into the OmptProfiler
500+
delete OmptData;
501+
}
496502

497503
for (TargetAsyncInfoTy *LocalTAI : TargetAsyncInfos) {
498504
AsyncInfoTy &AsyncInfo = *LocalTAI;
499505
if (__tgt_async_info *AI = AsyncInfo; AI->ProfilerData) {
500-
delete AI->ProfilerData;
506+
auto OmptData = reinterpret_cast<OmptEventInfoTy*>(AI->ProfilerData);
507+
// These deletes are going to go into the OmptProfiler
508+
delete OmptData;
501509
}
502510
}
503511
#endif

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -142,35 +142,41 @@ namespace plugin {
142142

143143
struct AMDGPUSignalTy;
144144
/// Use to transport information to OMPT timing functions.
145-
struct OmptKernelTimingArgsAsyncTy {
145+
struct ProfilingInfoTy {
146146
GenericDeviceTy *Device;
147147
AMDGPUSignalTy *Signal;
148148
double TicksToTime;
149-
std::unique_ptr<ompt::OmptEventInfoTy> OmptEventInfo;
149+
// FIXME: OMPT specific.
150+
std::unique_ptr<ompt::OmptEventInfoTy> ProfilerSpecificData;
150151
};
151152

152-
/// Get OmptKernelTimingArgsAsyncTy from the void * used in the action
153+
/// Get ProfilingInfoTy from the void * used in the action
153154
/// functions.
154-
static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data);
155+
static ProfilingInfoTy *getProfilingInfo(void *Data);
155156

156157
/// Returns the pair of <start, end> time for a kernel
157158
static std::pair<uint64_t, uint64_t>
158-
getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args);
159+
getKernelStartAndEndTime(const ProfilingInfoTy *Args);
159160

160161
/// Returns the pair of <start, end> time for a data transfer
161162
static std::pair<uint64_t, uint64_t>
162-
getCopyStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args);
163+
getCopyStartAndEndTime(const ProfilingInfoTy *Args);
163164

164165
/// Obtain the timing info and call the RegionInterface callback for the
165166
/// asynchronous trace records.
166167
static Error timeDataTransferInNsAsync(void *Data) {
167-
auto Args = getOmptTimingsArgs(Data);
168+
auto Args = getProfilingInfo(Data);
168169

169170
auto [Start, End] = getCopyStartAndEndTime(Args);
170171

171-
auto OmptEventInfo = *Args->OmptEventInfo.get();
172+
/// XXX For now this must not happen as this is only called in OMPT code path.
173+
assert(Args->ProfilerSpecificData &&
174+
"ProfilerSpecificData was null when profiler enabled");
175+
176+
// XXX: Is unique_ptr memory freed after leaving this scope?
177+
auto OmptEventInfo = Args->ProfilerSpecificData.get();
172178
llvm::omp::target::ompt::RegionInterface.stopTargetDataMovementTraceAsync(
173-
OmptEventInfo.TraceRecord, Start, End);
179+
OmptEventInfo->TraceRecord, Start, End);
174180

175181
return Plugin::success();
176182
}
@@ -192,11 +198,12 @@ getOrNullOmptEventInfo(AsyncInfoWrapperTy &AsyncInfoWrapper) {
192198

193199
// We need to copy the content of the ProfilerData object to persist it
194200
// between multiple async operations.
195-
auto LocalOmptEventInfo = std::make_unique<ompt::OmptEventInfoTy>(
196-
*reinterpret_cast<ompt::OmptEventInfoTy *>(AI->ProfilerData));
197-
// printOmptEventInfoTy(*AI->ProfilerData);
201+
// TODO: This is OMPT specific right now
202+
auto LocalOmptEventInfo =
203+
reinterpret_cast<ompt::OmptEventInfoTy *>(AI->ProfilerData);
198204
printOmptEventInfoTy(*LocalOmptEventInfo);
199-
return LocalOmptEventInfo;
205+
206+
return std::make_unique<ompt::OmptEventInfoTy>(*LocalOmptEventInfo);
200207
}
201208

202209
} // namespace plugin
@@ -263,6 +270,8 @@ namespace llvm::omp::target::ompt {
263270
struct OmptEventInfoTy {};
264271
} // namespace llvm::omp::target::ompt
265272
namespace llvm::omp::target::plugin {
273+
274+
/// When no OMPT is enabled, return nullptr to de-facto disable the profiling
266275
static std::unique_ptr<ompt::OmptEventInfoTy>
267276
getOrNullOmptEventInfo(AsyncInfoWrapperTy &AsyncInfoWrapper) {
268277
return nullptr;
@@ -1762,7 +1771,7 @@ struct AMDGPUStreamTy {
17621771
#ifdef OMPT_SUPPORT
17631772
/// Space for the OMPT action's arguments. A pointer to these arguments is
17641773
/// passed to the action function.
1765-
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync;
1774+
ProfilingInfoTy OmptKernelTimingArgsAsync;
17661775
#endif
17671776

17681777
/// Create an empty slot.
@@ -1813,19 +1822,18 @@ struct AMDGPUStreamTy {
18131822
GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
18141823
double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
18151824
OmptActionFunction = timeKernelInNsAsync;
1816-
OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1825+
OmptKernelTimingArgsAsync = ProfilingInfoTy{
18171826
Device, OutputSignal, TicksToTime, std::move(OMPTData)};
18181827
return Plugin::success();
18191828
}
18201829

18211830
/// Schedule OMPT data transfer timing on the slot
18221831
Error schedOmptAsyncD2HTransferTiming(
18231832
GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
1824-
double TicksToTime,
1825-
std::unique_ptr<ompt::OmptEventInfoTy> OmptInfoData) {
1833+
double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
18261834
OmptActionFunction = timeDataTransferInNsAsync;
1827-
OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1828-
Device, OutputSignal, TicksToTime, std::move(OmptInfoData)};
1835+
OmptKernelTimingArgsAsync = ProfilingInfoTy{
1836+
Device, OutputSignal, TicksToTime, std::move(OMPTData)};
18291837
return Plugin::success();
18301838
}
18311839
#endif
@@ -2180,7 +2188,7 @@ struct AMDGPUStreamTy {
21802188
#ifdef OMPT_SUPPORT
21812189
static Error timeKernelInNsAsync(void *Data) {
21822190
assert(Data && "Invalid data pointer in OMPT profiling");
2183-
auto Args = getOmptTimingsArgs(Data);
2191+
auto Args = getProfilingInfo(Data);
21842192

21852193
assert(Args && "Invalid args pointer in OMPT profiling");
21862194
auto [StartTime, EndTime] = getKernelStartAndEndTime(Args);
@@ -2189,12 +2197,11 @@ struct AMDGPUStreamTy {
21892197
"End %lu\n",
21902198
StartTime, EndTime);
21912199

2192-
assert(Args->OmptEventInfo && "Invalid OEI pointer in OMPT profiling");
2193-
auto OmptEventInfo = *Args->OmptEventInfo;
2200+
auto OmptEventInfo = Args->ProfilerSpecificData.get();
21942201

2195-
assert(OmptEventInfo.TraceRecord && "Invalid TraceRecord");
2202+
assert(OmptEventInfo->TraceRecord && "Invalid TraceRecord");
21962203
llvm::omp::target::ompt::RegionInterface.stopTargetSubmitTraceAsync(
2197-
OmptEventInfo.TraceRecord, OmptEventInfo.NumTeams, StartTime, EndTime);
2204+
OmptEventInfo->TraceRecord, OmptEventInfo->NumTeams, StartTime, EndTime);
21982205

21992206
return Plugin::success();
22002207
}
@@ -2451,11 +2458,11 @@ struct AMDGPUStreamTy {
24512458
/// the pinned host buffer. Both operations are asynchronous and dependent.
24522459
/// The intermediate pinned buffer will be released to the specified memory
24532460
/// manager once the operation completes.
2454-
Error pushMemoryCopyH2DAsync(void *Dst, const void *Src, void *Inter,
2455-
uint64_t CopySize,
2456-
AMDGPUMemoryManagerTy &MemoryManager,
2457-
std::unique_ptr<ompt::OmptEventInfoTy> OmptInfo = nullptr,
2458-
size_t NumTimes = 1) {
2461+
Error pushMemoryCopyH2DAsync(
2462+
void *Dst, const void *Src, void *Inter, uint64_t CopySize,
2463+
AMDGPUMemoryManagerTy &MemoryManager,
2464+
std::unique_ptr<ompt::OmptEventInfoTy> OmptInfo = nullptr,
2465+
size_t NumTimes = 1) {
24592466
// Retrieve available signals for the operation's outputs.
24602467
AMDGPUSignalTy *OutputSignals[2] = {};
24612468
if (auto Err = SignalManager.getResources(/*Num=*/2, OutputSignals))
@@ -3815,7 +3822,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38153822

38163823
#ifdef OMPT_SUPPORT
38173824
if (LocalOmptEventInfo) {
3818-
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3825+
ProfilingInfoTy OmptKernelTimingArgsAsync{
38193826
this, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
38203827
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
38213828
return Err;
@@ -3909,7 +3916,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39093916

39103917
#ifdef OMPT_SUPPORT
39113918
if (LocalOmptEventInfo) {
3912-
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3919+
ProfilingInfoTy OmptKernelTimingArgsAsync{
39133920
this, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
39143921
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
39153922
return Err;
@@ -3967,7 +3974,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39673974

39683975
#ifdef OMPT_SUPPORT
39693976
if (LocalOmptEventInfo) {
3970-
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3977+
ProfilingInfoTy OmptKernelTimingArgsAsync{
39713978
this, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
39723979
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
39733980
return Err;
@@ -5807,19 +5814,16 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
58075814

58085815
#ifdef OMPT_SUPPORT
58095816
/// Casts and validated the OMPT-related info passed to the action function.
5810-
static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
5811-
OmptKernelTimingArgsAsyncTy *Args =
5812-
reinterpret_cast<OmptKernelTimingArgsAsyncTy *>(Data);
5817+
static ProfilingInfoTy *getProfilingInfo(void *Data) {
5818+
ProfilingInfoTy *Args = reinterpret_cast<ProfilingInfoTy *>(Data);
58135819
assert(Args && "Invalid argument pointer");
58145820
assert(Args->Device && "Invalid device");
58155821
assert(Args->Signal && "Invalid signal");
5816-
assert(Args->OmptEventInfo && "Invalid OMPT Async data (nullptr)");
5817-
assert(Args->OmptEventInfo->TraceRecord && "Invalid Trace Record Pointer");
58185822
return Args;
58195823
}
58205824

58215825
static std::pair<uint64_t, uint64_t>
5822-
getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
5826+
getKernelStartAndEndTime(const ProfilingInfoTy *Args) {
58235827
assert(Args->Device && "Invalid GenericDevice Pointer in OMPT profiling");
58245828
assert(Args->Signal && "Invalid AMDGPUSignal Pointer in OMPT profiling");
58255829
auto *AMDGPUDevice = reinterpret_cast<AMDGPUDeviceTy *>(Args->Device);
@@ -5841,7 +5845,7 @@ getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
58415845
}
58425846

58435847
static std::pair<uint64_t, uint64_t>
5844-
getCopyStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
5848+
getCopyStartAndEndTime(const ProfilingInfoTy *Args) {
58455849
assert(Args->Device && "Invalid GenericDevice Pointer in OMPT profiling");
58465850
assert(Args->Signal && "Invalid AMDGPUSignal Pointer in OMPT profiling");
58475851
hsa_amd_profiling_async_copy_time_t TimeRec{0, 0};

0 commit comments

Comments
 (0)