@@ -160,6 +160,13 @@ static Error timeDataTransferInNsAsync(void *Data) {
160160
161161 return Plugin::success ();
162162}
163+
164+ static void *
165+ getOrNullProfilerSpecificData (AsyncInfoWrapperTy &AsyncInfoWrapper) {
166+ __tgt_async_info *AI = AsyncInfoWrapper;
167+ return AI ? AI->ProfilerData : nullptr ;
168+ }
169+
163170} // namespace plugin
164171} // namespace target
165172} // namespace omp
@@ -197,40 +204,6 @@ static double getTimeOfDay() {
197204 return TimeVal;
198205}
199206
200- #ifdef OMPT_SUPPORT
201- namespace llvm ::omp::target::plugin {
202- // / Returns a pointer to an OmptEventInfoTy object to be used for OMPT tracing
203- // / or nullptr. It is the caller's duty to free the returned pointer when no
204- // / longer needed.
205- static ompt::OmptEventInfoTy *
206- getOrNullOmptEventInfo (AsyncInfoWrapperTy &AsyncInfoWrapper) {
207- __tgt_async_info *AI = AsyncInfoWrapper;
208- if (!AI || !AI->ProfilerData )
209- return nullptr ;
210-
211- // The profiler data is allocated in the profiler for each individual event.
212- return reinterpret_cast <ompt::OmptEventInfoTy *>(AI->ProfilerData );
213- }
214-
215- } // namespace llvm::omp::target::plugin
216-
217- #else // OMPT_SUPPORT
218- namespace llvm ::omp::target::ompt {
219-
220- struct OmptEventInfoTy {};
221- } // namespace llvm::omp::target::ompt
222-
223- namespace llvm ::omp::target::plugin {
224-
225- // / When no OMPT is enabled, return nullptr to de-facto disable the profiling
226- static ompt::OmptEventInfoTy *
227- getOrNullOmptEventInfo (AsyncInfoWrapperTy &AsyncInfoWrapper) {
228- return nullptr ;
229- }
230-
231- } // namespace llvm::omp::target::plugin
232- #endif
233-
234207namespace llvm {
235208namespace omp {
236209namespace target {
@@ -1444,7 +1417,7 @@ struct AMDGPUQueueTy {
14441417 hsa_status_t Status =
14451418 hsa_queue_create (Agent, QueueSize, HSA_QUEUE_TYPE_MULTI, callbackError,
14461419 &Device, UINT32_MAX, UINT32_MAX, &Queue);
1447- if (Device.Plugin .getProfiler ()->shouldEnableProfiling () ||
1420+ if (Device.Plugin .getProfiler ()->isProfilingEnabled () ||
14481421 OMPX_EnableQueueProfiling)
14491422 hsa_amd_profiling_set_profiler_enabled (Queue, /* Enable=*/ 1 );
14501423
@@ -1810,10 +1783,11 @@ struct AMDGPUStreamTy {
18101783 Error schedProfilerKernelTiming (GenericDeviceTy *Device, hsa_agent_t Agent,
18111784 AMDGPUSignalTy *OutputSignal,
18121785 double TicksToTime,
1813- ompt::OmptEventInfoTy *OmptData ) {
1786+ void *ProfilerSpecificData ) {
18141787 Callbacks.emplace_back (timeKernelInNsAsync);
1815- ActionArgs.emplace_back ().ProfilerArgs = ProfilingInfoTy{
1816- &(Device->Plugin ), Agent, OutputSignal, TicksToTime, OmptData};
1788+ ActionArgs.emplace_back ().ProfilerArgs =
1789+ ProfilingInfoTy{&(Device->Plugin ), Agent, OutputSignal, TicksToTime,
1790+ ProfilerSpecificData};
18171791 return Plugin::success ();
18181792 }
18191793
@@ -1822,10 +1796,11 @@ struct AMDGPUStreamTy {
18221796 hsa_agent_t Agent,
18231797 AMDGPUSignalTy *OutputSignal,
18241798 double TicksToTime,
1825- ompt::OmptEventInfoTy *OmptData ) {
1799+ void *ProfilerSpecificData ) {
18261800 Callbacks.emplace_back (timeDataTransferInNsAsync);
1827- ActionArgs.emplace_back ().ProfilerArgs = ProfilingInfoTy{
1828- &(Device->Plugin ), Agent, OutputSignal, TicksToTime, OmptData};
1801+ ActionArgs.emplace_back ().ProfilerArgs =
1802+ ProfilingInfoTy{&(Device->Plugin ), Agent, OutputSignal, TicksToTime,
1803+ ProfilerSpecificData};
18291804 return Plugin::success ();
18301805 }
18311806
@@ -2200,7 +2175,7 @@ struct AMDGPUStreamTy {
22002175 uint32_t NumThreads[3 ], uint32_t NumBlocks[3 ],
22012176 uint32_t GroupSize, uint32_t StackSize,
22022177 AMDGPUMemoryManagerTy &MemoryManager,
2203- ompt::OmptEventInfoTy *OmptInfo = nullptr ) {
2178+ void *ProfilerSpecificData = nullptr ) {
22042179 if (Queue == nullptr )
22052180 return Plugin::error (ErrorCode::INVALID_NULL_POINTER,
22062181 " target queue was nullptr" );
@@ -2223,13 +2198,12 @@ struct AMDGPUStreamTy {
22232198
22242199 // TODO: Technically this conditional compilation is not needed anymore
22252200#ifdef OMPT_SUPPORT
2226- if (OmptInfo) {
2227- DP (" OMPT-Async: Info in KernelTy >> TR ptr: %p\n " , OmptInfo->TraceRecord );
2201+ if (ProfilerSpecificData) {
22282202
2229- // OmptInfo holds function pointer to finish trace record once the kernel
2230- // completed.
2203+ // ProfilerSpecificData holds function pointer to finish trace record once
2204+ // the kernel completed.
22312205 if (auto Err = Slots[Curr].schedProfilerKernelTiming (
2232- &Device, Agent, OutputSignal, TicksToTime, OmptInfo ))
2206+ &Device, Agent, OutputSignal, TicksToTime, ProfilerSpecificData ))
22332207 return Err;
22342208 }
22352209#endif
@@ -2302,7 +2276,7 @@ struct AMDGPUStreamTy {
23022276
23032277 // / Push an asynchronous memory copy between pinned memory buffers.
23042278 Error pushPinnedMemoryCopyAsync (void *Dst, const void *Src, uint64_t CopySize,
2305- ompt::OmptEventInfoTy *OmptInfo = nullptr ) {
2279+ void *ProfilerSpecificData = nullptr ) {
23062280 // Retrieve an available signal for the operation's output.
23072281 AMDGPUSignalTy *OutputSignal = nullptr ;
23082282 if (auto Err = SignalManager.getResource (OutputSignal))
@@ -2317,11 +2291,10 @@ struct AMDGPUStreamTy {
23172291
23182292 // TODO: Technically this conditional compilation is not needed anymore
23192293#ifdef OMPT_SUPPORT
2320- if (OmptInfo) {
2321- DP (" OMPT-Async: Registering data timing in pushPinnedMemoryCopyAsync\n " );
2294+ if (ProfilerSpecificData) {
23222295 // Capture the time the data transfer required for the d2h transfer.
23232296 if (auto Err = Slots[Curr].schedProfilerDataTransferTiming (
2324- &Device, Agent, OutputSignal, TicksToTime, OmptInfo ))
2297+ &Device, Agent, OutputSignal, TicksToTime, ProfilerSpecificData ))
23252298 return Err;
23262299 }
23272300#endif
@@ -2348,7 +2321,7 @@ struct AMDGPUStreamTy {
23482321 Error pushMemoryCopyD2HAsync (void *Dst, const void *Src, void *Inter,
23492322 uint64_t CopySize,
23502323 AMDGPUMemoryManagerTy &MemoryManager,
2351- ompt::OmptEventInfoTy *OmptInfo = nullptr ) {
2324+ void *ProfilerSpecificData = nullptr ) {
23522325 // Retrieve available signals for the operation's outputs.
23532326 AMDGPUSignalTy *OutputSignals[2 ] = {};
23542327 if (auto Err = SignalManager.getResources (/* Num=*/ 2 , OutputSignals))
@@ -2374,11 +2347,11 @@ struct AMDGPUStreamTy {
23742347
23752348 // TODO: Technically this conditional compilation is not needed anymore
23762349#ifdef OMPT_SUPPORT
2377- if (OmptInfo) {
2378- DP (" OMPT-Async: Registering data timing in pushMemoryCopyD2HAsync\n " );
2350+ if (ProfilerSpecificData) {
23792351 // Capture the time the data transfer required for the d2h transfer.
23802352 if (auto Err = Slots[Curr].schedProfilerDataTransferTiming (
2381- &Device, Agent, OutputSignals[0 ], TicksToTime, OmptInfo))
2353+ &Device, Agent, OutputSignals[0 ], TicksToTime,
2354+ ProfilerSpecificData))
23822355 return Err;
23832356 }
23842357#endif
@@ -2435,7 +2408,7 @@ struct AMDGPUStreamTy {
24352408 Error pushMemoryCopyH2DAsync (void *Dst, const void *Src, void *Inter,
24362409 uint64_t CopySize,
24372410 AMDGPUMemoryManagerTy &MemoryManager,
2438- ompt::OmptEventInfoTy *OmptInfo = nullptr ,
2411+ void *ProfilerSpecificData = nullptr ,
24392412 size_t NumTimes = 1 ) {
24402413 // Retrieve available signals for the operation's outputs.
24412414 AMDGPUSignalTy *OutputSignals[2 ] = {};
@@ -2498,11 +2471,11 @@ struct AMDGPUStreamTy {
24982471
24992472 // TODO: Technically, this conditional compilation is not needed anymore
25002473#ifdef OMPT_SUPPORT
2501- if (OmptInfo) {
2502- DP (" OMPT-Async: Registering data timing in pushMemoryCopyH2DAsync\n " );
2474+ if (ProfilerSpecificData) {
25032475 // Capture the time the data transfer required for the d2h transfer.
25042476 if (auto Err = Slots[Curr].schedProfilerDataTransferTiming (
2505- &Device, Agent, OutputSignals[0 ], TicksToTime, OmptInfo))
2477+ &Device, Agent, OutputSignals[0 ], TicksToTime,
2478+ ProfilerSpecificData))
25062479 return Err;
25072480 }
25082481#endif
@@ -2523,7 +2496,7 @@ struct AMDGPUStreamTy {
25232496 // AMDGPUDeviceTy is incomplete here, passing the underlying agent instead
25242497 Error pushMemoryCopyD2DAsync (void *Dst, hsa_agent_t DstAgent, const void *Src,
25252498 hsa_agent_t SrcAgent, uint64_t CopySize,
2526- ompt::OmptEventInfoTy *OmptInfo = nullptr ) {
2499+ void *ProfilerSpecificData = nullptr ) {
25272500 AMDGPUSignalTy *OutputSignal;
25282501 if (auto Err = SignalManager.getResources (/* Num=*/ 1 , &OutputSignal))
25292502 return Err;
@@ -2537,11 +2510,10 @@ struct AMDGPUStreamTy {
25372510
25382511 // TODO: Technically, this conditional compilation is not needed anymore
25392512#ifdef OMPT_SUPPORT
2540- if (OmptInfo) {
2541- DP (" OMPT-Async: Registering data timing in pushMemoryCopyD2DAsync\n " );
2513+ if (ProfilerSpecificData) {
25422514 // Capture the time the data transfer required for the d2h transfer.
25432515 if (auto Err = Slots[Curr].schedProfilerDataTransferTiming (
2544- &Device, Agent, OutputSignal, TicksToTime, OmptInfo ))
2516+ &Device, Agent, OutputSignal, TicksToTime, ProfilerSpecificData ))
25452517 return Err;
25462518 }
25472519#endif
@@ -3746,8 +3718,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37463718 void *PinnedPtr = nullptr ;
37473719
37483720 // Obtain the OMPT-related callback data
3749- DP (" OMPT-Async: dataSubmitImpl\n " );
3750- auto LocalOmptEventInfo = getOrNullOmptEventInfo (AsyncInfoWrapper);
3721+ auto ProfilerSpecificData = getOrNullProfilerSpecificData (AsyncInfoWrapper);
37513722
37523723 // Prefault GPU page table in XNACK-Enabled case, on APUs,
37533724 // under the assumption that explicitly allocated memory
@@ -3763,9 +3734,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37633734 PinnedAllocs.getDeviceAccessiblePtrFromPinnedBuffer (HstPtr)) {
37643735 if (auto Err = getStream (AsyncInfoWrapper, Stream))
37653736 return Err;
3766- DP (" OMPT-Async: Pinned Copy\n " );
37673737 return Stream->pushPinnedMemoryCopyAsync (TgtPtr, PinnedPtr, Size,
3768- LocalOmptEventInfo );
3738+ ProfilerSpecificData );
37693739 }
37703740
37713741 // For large transfers use synchronous behavior.
@@ -3793,7 +3763,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37933763 if (auto Err = Signal.init ())
37943764 return Err;
37953765
3796- DP (" OMPT-Async: Sync Copy\n " );
37973766 if (auto Err = hsa_utils::asyncMemCopy (useMultipleSdmaEngines (), TgtPtr,
37983767 Agent, PinnedPtr, Agent, Size, 0 ,
37993768 nullptr , Signal.get ()))
@@ -3803,9 +3772,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38033772 return Err;
38043773
38053774#ifdef OMPT_SUPPORT
3806- if (LocalOmptEventInfo ) {
3775+ if (Plugin. getProfiler ()-> isProfilingEnabled () ) {
38073776 ProfilingInfoTy OmptKernelTimingArgsAsync{
3808- &Plugin, Agent, &Signal, TicksToTime, LocalOmptEventInfo };
3777+ &Plugin, Agent, &Signal, TicksToTime, ProfilerSpecificData };
38093778
38103779 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
38113780 return Err;
@@ -3828,10 +3797,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38283797 if (auto Err = getStream (AsyncInfoWrapper, Stream))
38293798 return Err;
38303799
3831- DP (" OMPT-Async: ASync Copy\n " );
38323800 return Stream->pushMemoryCopyH2DAsync (TgtPtr, HstPtr, PinnedPtr, Size,
38333801 PinnedMemoryManager,
3834- LocalOmptEventInfo );
3802+ ProfilerSpecificData );
38353803 }
38363804
38373805 // / Retrieve data from the device (device to host transfer).
@@ -3841,8 +3809,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38413809 void *PinnedPtr = nullptr ;
38423810
38433811 // Obtain the OMPT-related callback data
3844- DP (" OMPT-Async: dataRetrieveImpl\n " );
3845- auto LocalOmptEventInfo = getOrNullOmptEventInfo (AsyncInfoWrapper);
3812+ auto ProfilerSpecificData = getOrNullProfilerSpecificData (AsyncInfoWrapper);
38463813
38473814 // Prefault GPU page table in XNACK-Enabled case, on APUs,
38483815 // under the assumption that explicitly allocated memory
@@ -3858,9 +3825,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38583825 PinnedAllocs.getDeviceAccessiblePtrFromPinnedBuffer (HstPtr)) {
38593826 if (auto Err = getStream (AsyncInfoWrapper, Stream))
38603827 return Err;
3861- DP (" OMPT-Async: Pinned Copy\n " );
38623828 return Stream->pushPinnedMemoryCopyAsync (PinnedPtr, TgtPtr, Size,
3863- LocalOmptEventInfo );
3829+ ProfilerSpecificData );
38643830 }
38653831
38663832 // For large transfers use synchronous behavior.
@@ -3898,9 +3864,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38983864 return Err;
38993865
39003866#ifdef OMPT_SUPPORT
3901- if (LocalOmptEventInfo ) {
3867+ if (Plugin. getProfiler ()-> isProfilingEnabled () ) {
39023868 ProfilingInfoTy OmptKernelTimingArgsAsync{
3903- &Plugin, Agent, &Signal, TicksToTime, LocalOmptEventInfo };
3869+ &Plugin, Agent, &Signal, TicksToTime, ProfilerSpecificData };
39043870
39053871 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
39063872 return Err;
@@ -3925,7 +3891,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39253891
39263892 return Stream->pushMemoryCopyD2HAsync (HstPtr, TgtPtr, PinnedPtr, Size,
39273893 PinnedMemoryManager,
3928- LocalOmptEventInfo );
3894+ ProfilerSpecificData );
39293895 }
39303896
39313897 // / Exchange data between two devices within the plugin.
@@ -3934,8 +3900,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39343900 AsyncInfoWrapperTy &AsyncInfoWrapper) override {
39353901 AMDGPUDeviceTy &DstDevice = static_cast <AMDGPUDeviceTy &>(DstGenericDevice);
39363902
3937- DP (" OMPT-Async: dataExchangeImpl\n " );
3938- auto LocalOmptEventInfo = getOrNullOmptEventInfo (AsyncInfoWrapper);
3903+ auto ProfilerSpecificData = getOrNullProfilerSpecificData (AsyncInfoWrapper);
39393904
39403905 // For large transfers use synchronous behavior.
39413906 // If OMPT is enabled or synchronous behavior is explicitly requested:
@@ -3957,9 +3922,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39573922 return Err;
39583923
39593924#ifdef OMPT_SUPPORT
3960- if (LocalOmptEventInfo ) {
3925+ if (Plugin. getProfiler ()-> isProfilingEnabled () ) {
39613926 ProfilingInfoTy OmptKernelTimingArgsAsync{
3962- &Plugin, Agent, &Signal, TicksToTime, LocalOmptEventInfo };
3927+ &Plugin, Agent, &Signal, TicksToTime, ProfilerSpecificData };
39633928
39643929 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
39653930 return Err;
@@ -3977,7 +3942,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39773942
39783943 return Stream->pushMemoryCopyD2DAsync (DstPtr, DstDevice.getAgent (), SrcPtr,
39793944 getAgent (), (uint64_t )Size,
3980- LocalOmptEventInfo );
3945+ ProfilerSpecificData );
39813946 }
39823947
39833948 // / Insert a data fence between previous data operations and the following
@@ -5601,12 +5566,12 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
56015566 }
56025567
56035568 // Get required OMPT-related data
5604- auto LocalOmptEventInfo = getOrNullOmptEventInfo (AsyncInfoWrapper);
5569+ auto ProfilerSpecificData = getOrNullProfilerSpecificData (AsyncInfoWrapper);
56055570
56065571 // Push the kernel launch into the stream.
56075572 return Stream->pushKernelLaunch (*this , AllArgs, NumThreads, NumBlocks,
56085573 GroupSize, static_cast <uint32_t >(StackSize),
5609- ArgsMemoryManager, LocalOmptEventInfo );
5574+ ArgsMemoryManager, ProfilerSpecificData );
56105575}
56115576
56125577void AMDGPUKernelTy::printAMDOneLineKernelTrace (GenericDeviceTy &GenericDevice,
@@ -5847,8 +5812,8 @@ static ProfilingInfoTy *getProfilingInfo(void *Data) {
58475812
58485813static std::pair<uint64_t , uint64_t >
58495814getKernelStartAndEndTime (const ProfilingInfoTy *Args) {
5850- assert (Args->Plugin && " Invalid GenericPlugin pointer in OMPT profiling" );
5851- assert (Args->Signal && " Invalid AMDGPUSignal pointer in OMPT profiling" );
5815+ assert (Args->Plugin && " Invalid GenericPlugin pointer in profiling" );
5816+ assert (Args->Signal && " Invalid AMDGPUSignal pointer in profiling" );
58525817
58535818 hsa_amd_profiling_dispatch_time_t TimeRec{0 , 0 };
58545819 hsa_status_t Status = hsa_amd_profiling_get_dispatch_time (
@@ -5872,7 +5837,7 @@ getKernelStartAndEndTime(const ProfilingInfoTy *Args) {
58725837
58735838static std::pair<uint64_t , uint64_t >
58745839getCopyStartAndEndTime (const ProfilingInfoTy *Args) {
5875- assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
5840+ assert (Args->Signal && " Invalid AMDGPUSignal Pointer in profiling" );
58765841
58775842 hsa_amd_profiling_async_copy_time_t TimeRec{0 , 0 };
58785843 hsa_status_t Status =
0 commit comments