@@ -138,7 +138,7 @@ namespace plugin {
138138struct AMDGPUSignalTy ;
139139// / Use to transport information to OMPT timing functions.
140140struct OmptKernelTimingArgsAsyncTy {
141- hsa_agent_t Agent ;
141+ GenericDeviceTy *Device ;
142142 AMDGPUSignalTy *Signal;
143143 double TicksToTime;
144144 std::unique_ptr<ompt::OmptEventInfoTy> OmptEventInfo;
@@ -1773,21 +1773,22 @@ struct AMDGPUStreamTy {
17731773#ifdef OMPT_SUPPORT
17741774 // / Schedule OMPT kernel timing on the slot.
17751775 Error schedOmptAsyncKernelTiming (
1776- hsa_agent_t Agent , AMDGPUSignalTy *OutputSignal, double TicksToTime ,
1777- std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
1776+ GenericDeviceTy *Device , AMDGPUSignalTy *OutputSignal,
1777+ double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
17781778 OmptActionFunction = timeKernelInNsAsync;
17791779 OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1780- Agent , OutputSignal, TicksToTime, std::move (OMPTData)};
1780+ Device , OutputSignal, TicksToTime, std::move (OMPTData)};
17811781 return Plugin::success ();
17821782 }
17831783
17841784 // / Schedule OMPT data transfer timing on the slot
17851785 Error schedOmptAsyncD2HTransferTiming (
1786- hsa_agent_t Agent, AMDGPUSignalTy *OutputSignal, double TicksToTime,
1786+ GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
1787+ double TicksToTime,
17871788 std::unique_ptr<ompt::OmptEventInfoTy> OmptInfoData) {
17881789 OmptActionFunction = timeDataTransferInNsAsync;
17891790 OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1790- Agent , OutputSignal, TicksToTime, std::move (OmptInfoData)};
1791+ Device , OutputSignal, TicksToTime, std::move (OmptInfoData)};
17911792 return Plugin::success ();
17921793 }
17931794#endif
@@ -2158,7 +2159,7 @@ struct AMDGPUStreamTy {
21582159 // OmptInfo holds function pointer to finish trace record once the kernel
21592160 // completed.
21602161 if (auto Err = Slots[Curr].schedOmptAsyncKernelTiming (
2161- Agent , OutputSignal, TicksToTime, std::move (OmptInfo)))
2162+ &Device , OutputSignal, TicksToTime, std::move (OmptInfo)))
21622163 return Err;
21632164 }
21642165#endif
@@ -2235,7 +2236,7 @@ struct AMDGPUStreamTy {
22352236 DP (" OMPT-Async: Registering data timing in pushPinnedMemoryCopyAsync\n " );
22362237 // Capture the time the data transfer required for the d2h transfer.
22372238 if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2238- Agent , OutputSignal, TicksToTime, std::move (OmptInfo)))
2239+ &Device , OutputSignal, TicksToTime, std::move (OmptInfo)))
22392240 return Err;
22402241 }
22412242#endif
@@ -2291,7 +2292,7 @@ struct AMDGPUStreamTy {
22912292 DP (" OMPT-Async: Registering data timing in pushMemoryCopyD2HAsync\n " );
22922293 // Capture the time the data transfer required for the d2h transfer.
22932294 if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2294- Agent , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
2295+ &Device , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
22952296 return Err;
22962297 }
22972298#endif
@@ -2408,7 +2409,7 @@ struct AMDGPUStreamTy {
24082409 DP (" OMPT-Async: Registering data timing in pushMemoryCopyH2DAsync\n " );
24092410 // Capture the time the data transfer required for the d2h transfer.
24102411 if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2411- Agent , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
2412+ &Device , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
24122413 return Err;
24132414 }
24142415#endif
@@ -2447,7 +2448,7 @@ struct AMDGPUStreamTy {
24472448 DP (" OMPT-Async: Registering data timing in pushMemoryCopyD2DAsync\n " );
24482449 // Capture the time the data transfer required for the d2h transfer.
24492450 if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2450- Agent , OutputSignal, TicksToTime, std::move (OmptInfo)))
2451+ &Device , OutputSignal, TicksToTime, std::move (OmptInfo)))
24512452 return Err;
24522453 }
24532454#endif
@@ -3605,7 +3606,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
36053606#ifdef OMPT_SUPPORT
36063607 if (LocalOmptEventInfo) {
36073608 OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3608- Agent , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3609+ this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
36093610 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
36103611 return Err;
36113612 }
@@ -3691,7 +3692,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
36913692#ifdef OMPT_SUPPORT
36923693 if (LocalOmptEventInfo) {
36933694 OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3694- Agent , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3695+ this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
36953696 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
36963697 return Err;
36973698 }
@@ -3749,7 +3750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37493750#ifdef OMPT_SUPPORT
37503751 if (LocalOmptEventInfo) {
37513752 OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3752- Agent , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3753+ this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
37533754 if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
37543755 return Err;
37553756 }
@@ -4268,6 +4269,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42684269 return Size <= OMPX_SharedDescriptorMaxSize;
42694270 }
42704271
4272+ bool useStrictSanityChecks () const { return OMPX_StrictSanityChecks; }
4273+
42714274private:
42724275 using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
42734276 using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
@@ -5346,6 +5349,7 @@ static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
53465349 OmptKernelTimingArgsAsyncTy *Args =
53475350 reinterpret_cast <OmptKernelTimingArgsAsyncTy *>(Data);
53485351 assert (Args && " Invalid argument pointer" );
5352+ assert (Args->Device && " Invalid device" );
53495353 assert (Args->Signal && " Invalid signal" );
53505354 assert (Args->OmptEventInfo && " Invalid OMPT Async data (nullptr)" );
53515355 assert (Args->OmptEventInfo ->TraceRecord && " Invalid Trace Record Pointer" );
@@ -5354,17 +5358,17 @@ static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
53545358
53555359static std::pair<uint64_t , uint64_t >
53565360getKernelStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args) {
5361+ assert (Args->Device && " Invalid GenericDevice Pointer in OMPT profiling" );
53575362 assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
5363+ auto *AMDGPUDevice = reinterpret_cast <AMDGPUDeviceTy *>(Args->Device );
53585364 hsa_amd_profiling_dispatch_time_t TimeRec{0 , 0 };
53595365 hsa_status_t Status = hsa_amd_profiling_get_dispatch_time (
5360- Args-> Agent , Args->Signal ->get (), &TimeRec);
5366+ AMDGPUDevice-> getAgent () , Args->Signal ->get (), &TimeRec);
53615367 if (auto Err = Plugin::check (
53625368 Status,
53635369 " WARNING Could not retrieve kernel dispatch timestamps: %s" )) {
53645370 MESSAGE0 (toString (std::move (Err)).data ());
5365- static BoolEnvar OMPX_StrictSanityChecks{" OMPX_STRICT_SANITY_CHECKS" ,
5366- false };
5367- if (OMPX_StrictSanityChecks)
5371+ if (AMDGPUDevice->useStrictSanityChecks ())
53685372 llvm_unreachable (" User-requested hard stop on sanity check errors." );
53695373 }
53705374
@@ -5376,16 +5380,16 @@ getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
53765380
53775381static std::pair<uint64_t , uint64_t >
53785382getCopyStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args) {
5383+ assert (Args->Device && " Invalid GenericDevice Pointer in OMPT profiling" );
53795384 assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
53805385 hsa_amd_profiling_async_copy_time_t TimeRec{0 , 0 };
53815386 hsa_status_t Status =
53825387 hsa_amd_profiling_get_async_copy_time (Args->Signal ->get (), &TimeRec);
53835388 if (auto Err = Plugin::check (
53845389 Status, " WARNING Could not retrieve data-copy timestamps: %s" )) {
53855390 MESSAGE0 (toString (std::move (Err)).data ());
5386- static BoolEnvar OMPX_StrictSanityChecks{" OMPX_STRICT_SANITY_CHECKS" ,
5387- false };
5388- if (OMPX_StrictSanityChecks)
5391+ auto *AMDGPUDevice = reinterpret_cast <AMDGPUDeviceTy *>(Args->Device );
5392+ if (AMDGPUDevice->useStrictSanityChecks ())
53895393 llvm_unreachable (" User-requested hard stop on sanity check errors." );
53905394 }
53915395
0 commit comments