@@ -142,35 +142,41 @@ namespace plugin {
142142
143143struct AMDGPUSignalTy ;
144144// / Use to transport information to OMPT timing functions.
145- struct OmptKernelTimingArgsAsyncTy {
145+ struct ProfilingInfoTy {
146146 GenericDeviceTy *Device;
147147 AMDGPUSignalTy *Signal;
148148 double TicksToTime;
149- std::unique_ptr<ompt::OmptEventInfoTy> OmptEventInfo;
149+ // FIXME: OMPT specific.
150+ std::unique_ptr<ompt::OmptEventInfoTy> ProfilerSpecificData;
150151};
151152
152- // / Get OmptKernelTimingArgsAsyncTy from the void * used in the action
153+ // / Get ProfilingInfoTy from the void * used in the action
153154// / functions.
154- static OmptKernelTimingArgsAsyncTy * getOmptTimingsArgs (void *Data);
155+ static ProfilingInfoTy * getProfilingInfo (void *Data);
155156
156157// / Returns the pair of <start, end> time for a kernel
157158static std::pair<uint64_t , uint64_t >
158- getKernelStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args);
159+ getKernelStartAndEndTime (const ProfilingInfoTy *Args);
159160
160161// / Returns the pair of <start, end> time for a data transfer
161162static std::pair<uint64_t , uint64_t >
162- getCopyStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args);
163+ getCopyStartAndEndTime (const ProfilingInfoTy *Args);
163164
164165// / Obtain the timing info and call the RegionInterface callback for the
165166// / asynchronous trace records.
166167static Error timeDataTransferInNsAsync (void *Data) {
167- auto Args = getOmptTimingsArgs (Data);
168+ auto Args = getProfilingInfo (Data);
168169
169170 auto [Start, End] = getCopyStartAndEndTime (Args);
170171
171- auto OmptEventInfo = *Args->OmptEventInfo .get ();
172+ // / XXX For now this must not happen as this is only called in OMPT code path.
173+ assert (Args->ProfilerSpecificData &&
174+ " ProfilerSpecificData was null when profiler enabled" );
175+
176+ // XXX: Is unique_ptr memory freed after leaving this scope?
177+ auto OmptEventInfo = Args->ProfilerSpecificData .get ();
172178 llvm::omp::target::ompt::RegionInterface.stopTargetDataMovementTraceAsync (
173- OmptEventInfo. TraceRecord , Start, End);
179+ OmptEventInfo-> TraceRecord , Start, End);
174180
175181 return Plugin::success ();
176182}
@@ -192,11 +198,12 @@ getOrNullOmptEventInfo(AsyncInfoWrapperTy &AsyncInfoWrapper) {
192198
193199 // We need to copy the content of the ProfilerData object to persist it
194200 // between multiple async operations.
195- auto LocalOmptEventInfo = std::make_unique<ompt::OmptEventInfoTy>(
196- * reinterpret_cast <ompt::OmptEventInfoTy *>(AI-> ProfilerData ));
197- // printOmptEventInfoTy(* AI->ProfilerData);
201+ // TODO: This is OMPT specific right now
202+ auto LocalOmptEventInfo =
203+ reinterpret_cast <ompt::OmptEventInfoTy *>( AI->ProfilerData );
198204 printOmptEventInfoTy (*LocalOmptEventInfo);
199- return LocalOmptEventInfo;
205+
206+ return std::make_unique<ompt::OmptEventInfoTy>(*LocalOmptEventInfo);
200207}
201208
202209} // namespace plugin
@@ -263,6 +270,8 @@ namespace llvm::omp::target::ompt {
263270struct OmptEventInfoTy {};
264271} // namespace llvm::omp::target::ompt
265272namespace llvm ::omp::target::plugin {
273+
274+ // / When no OMPT is enabled, return nullptr to de-facto disable the profiling
266275static std::unique_ptr<ompt::OmptEventInfoTy>
267276getOrNullOmptEventInfo (AsyncInfoWrapperTy &AsyncInfoWrapper) {
268277 return nullptr ;
@@ -1762,7 +1771,7 @@ struct AMDGPUStreamTy {
17621771#ifdef OMPT_SUPPORT
17631772 // / Space for the OMPT action's arguments. A pointer to these arguments is
17641773 // / passed to the action function.
1765- OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync;
1774+ ProfilingInfoTy OmptKernelTimingArgsAsync;
17661775#endif
17671776
17681777 // / Create an empty slot.
@@ -1813,19 +1822,18 @@ struct AMDGPUStreamTy {
18131822 GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
18141823 double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
18151824 OmptActionFunction = timeKernelInNsAsync;
1816- OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy {
1825+ OmptKernelTimingArgsAsync = ProfilingInfoTy {
18171826 Device, OutputSignal, TicksToTime, std::move (OMPTData)};
18181827 return Plugin::success ();
18191828 }
18201829
18211830 // / Schedule OMPT data transfer timing on the slot
18221831 Error schedOmptAsyncD2HTransferTiming (
18231832 GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
1824- double TicksToTime,
1825- std::unique_ptr<ompt::OmptEventInfoTy> OmptInfoData) {
1833+ double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
18261834 OmptActionFunction = timeDataTransferInNsAsync;
1827- OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy {
1828- Device, OutputSignal, TicksToTime, std::move (OmptInfoData )};
1835+ OmptKernelTimingArgsAsync = ProfilingInfoTy {
1836+ Device, OutputSignal, TicksToTime, std::move (OMPTData )};
18291837 return Plugin::success ();
18301838 }
18311839#endif
@@ -2180,7 +2188,7 @@ struct AMDGPUStreamTy {
21802188#ifdef OMPT_SUPPORT
21812189 static Error timeKernelInNsAsync (void *Data) {
21822190 assert (Data && " Invalid data pointer in OMPT profiling" );
2183- auto Args = getOmptTimingsArgs (Data);
2191+ auto Args = getProfilingInfo (Data);
21842192
21852193 assert (Args && " Invalid args pointer in OMPT profiling" );
21862194 auto [StartTime, EndTime] = getKernelStartAndEndTime (Args);
@@ -2189,12 +2197,11 @@ struct AMDGPUStreamTy {
21892197 " End %lu\n " ,
21902198 StartTime, EndTime);
21912199
2192- assert (Args->OmptEventInfo && " Invalid OEI pointer in OMPT profiling" );
2193- auto OmptEventInfo = *Args->OmptEventInfo ;
2200+ auto OmptEventInfo = Args->ProfilerSpecificData .get ();
21942201
2195- assert (OmptEventInfo. TraceRecord && " Invalid TraceRecord" );
2202+ assert (OmptEventInfo-> TraceRecord && " Invalid TraceRecord" );
21962203 llvm::omp::target::ompt::RegionInterface.stopTargetSubmitTraceAsync (
2197- OmptEventInfo. TraceRecord , OmptEventInfo. NumTeams , StartTime, EndTime);
2204+ OmptEventInfo-> TraceRecord , OmptEventInfo-> NumTeams , StartTime, EndTime);
21982205
21992206 return Plugin::success ();
22002207 }
@@ -2451,11 +2458,11 @@ struct AMDGPUStreamTy {
24512458 // / the pinned host buffer. Both operations are asynchronous and dependent.
24522459 // / The intermediate pinned buffer will be released to the specified memory
24532460 // / manager once the operation completes.
2454- Error pushMemoryCopyH2DAsync (void *Dst, const void *Src, void *Inter,
2455- uint64_t CopySize,
2456- AMDGPUMemoryManagerTy &MemoryManager,
2457- std::unique_ptr<ompt::OmptEventInfoTy> OmptInfo = nullptr ,
2458- size_t NumTimes = 1 ) {
2461+ Error pushMemoryCopyH2DAsync (
2462+ void *Dst, const void *Src, void *Inter, uint64_t CopySize,
2463+ AMDGPUMemoryManagerTy &MemoryManager,
2464+ std::unique_ptr<ompt::OmptEventInfoTy> OmptInfo = nullptr ,
2465+ size_t NumTimes = 1 ) {
24592466 // Retrieve available signals for the operation's outputs.
24602467 AMDGPUSignalTy *OutputSignals[2 ] = {};
24612468 if (auto Err = SignalManager.getResources (/* Num=*/ 2 , OutputSignals))
@@ -3815,7 +3822,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38153822
38163823#ifdef OMPT_SUPPORT
38173824 if (LocalOmptEventInfo) {
3818- OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3825+ ProfilingInfoTy OmptKernelTimingArgsAsync{
38193826 this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
38203827 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
38213828 return Err;
@@ -3909,7 +3916,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39093916
39103917#ifdef OMPT_SUPPORT
39113918 if (LocalOmptEventInfo) {
3912- OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3919+ ProfilingInfoTy OmptKernelTimingArgsAsync{
39133920 this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
39143921 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
39153922 return Err;
@@ -3967,7 +3974,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39673974
39683975#ifdef OMPT_SUPPORT
39693976 if (LocalOmptEventInfo) {
3970- OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3977+ ProfilingInfoTy OmptKernelTimingArgsAsync{
39713978 this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
39723979 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
39733980 return Err;
@@ -5807,19 +5814,16 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
58075814
58085815#ifdef OMPT_SUPPORT
58095816// / Casts and validated the OMPT-related info passed to the action function.
5810- static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs (void *Data) {
5811- OmptKernelTimingArgsAsyncTy *Args =
5812- reinterpret_cast <OmptKernelTimingArgsAsyncTy *>(Data);
5817+ static ProfilingInfoTy *getProfilingInfo (void *Data) {
5818+ ProfilingInfoTy *Args = reinterpret_cast <ProfilingInfoTy *>(Data);
58135819 assert (Args && " Invalid argument pointer" );
58145820 assert (Args->Device && " Invalid device" );
58155821 assert (Args->Signal && " Invalid signal" );
5816- assert (Args->OmptEventInfo && " Invalid OMPT Async data (nullptr)" );
5817- assert (Args->OmptEventInfo ->TraceRecord && " Invalid Trace Record Pointer" );
58185822 return Args;
58195823}
58205824
58215825static std::pair<uint64_t , uint64_t >
5822- getKernelStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args) {
5826+ getKernelStartAndEndTime (const ProfilingInfoTy *Args) {
58235827 assert (Args->Device && " Invalid GenericDevice Pointer in OMPT profiling" );
58245828 assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
58255829 auto *AMDGPUDevice = reinterpret_cast <AMDGPUDeviceTy *>(Args->Device );
@@ -5841,7 +5845,7 @@ getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
58415845}
58425846
58435847static std::pair<uint64_t , uint64_t >
5844- getCopyStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args) {
5848+ getCopyStartAndEndTime (const ProfilingInfoTy *Args) {
58455849 assert (Args->Device && " Invalid GenericDevice Pointer in OMPT profiling" );
58465850 assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
58475851 hsa_amd_profiling_async_copy_time_t TimeRec{0 , 0 };
0 commit comments