@@ -138,7 +138,7 @@ namespace plugin {
138
138
struct AMDGPUSignalTy ;
139
139
// / Use to transport information to OMPT timing functions.
140
140
struct OmptKernelTimingArgsAsyncTy {
141
- hsa_agent_t Agent ;
141
+ GenericDeviceTy *Device ;
142
142
AMDGPUSignalTy *Signal;
143
143
double TicksToTime;
144
144
std::unique_ptr<ompt::OmptEventInfoTy> OmptEventInfo;
@@ -1773,21 +1773,22 @@ struct AMDGPUStreamTy {
1773
1773
#ifdef OMPT_SUPPORT
1774
1774
// / Schedule OMPT kernel timing on the slot.
1775
1775
Error schedOmptAsyncKernelTiming (
1776
- hsa_agent_t Agent , AMDGPUSignalTy *OutputSignal, double TicksToTime ,
1777
- std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
1776
+ GenericDeviceTy *Device , AMDGPUSignalTy *OutputSignal,
1777
+ double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
1778
1778
OmptActionFunction = timeKernelInNsAsync;
1779
1779
OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1780
- Agent , OutputSignal, TicksToTime, std::move (OMPTData)};
1780
+ Device , OutputSignal, TicksToTime, std::move (OMPTData)};
1781
1781
return Plugin::success ();
1782
1782
}
1783
1783
1784
1784
// / Schedule OMPT data transfer timing on the slot
1785
1785
Error schedOmptAsyncD2HTransferTiming (
1786
- hsa_agent_t Agent, AMDGPUSignalTy *OutputSignal, double TicksToTime,
1786
+ GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
1787
+ double TicksToTime,
1787
1788
std::unique_ptr<ompt::OmptEventInfoTy> OmptInfoData) {
1788
1789
OmptActionFunction = timeDataTransferInNsAsync;
1789
1790
OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1790
- Agent , OutputSignal, TicksToTime, std::move (OmptInfoData)};
1791
+ Device , OutputSignal, TicksToTime, std::move (OmptInfoData)};
1791
1792
return Plugin::success ();
1792
1793
}
1793
1794
#endif
@@ -2158,7 +2159,7 @@ struct AMDGPUStreamTy {
2158
2159
// OmptInfo holds function pointer to finish trace record once the kernel
2159
2160
// completed.
2160
2161
if (auto Err = Slots[Curr].schedOmptAsyncKernelTiming (
2161
- Agent , OutputSignal, TicksToTime, std::move (OmptInfo)))
2162
+ &Device , OutputSignal, TicksToTime, std::move (OmptInfo)))
2162
2163
return Err;
2163
2164
}
2164
2165
#endif
@@ -2235,7 +2236,7 @@ struct AMDGPUStreamTy {
2235
2236
DP (" OMPT-Async: Registering data timing in pushPinnedMemoryCopyAsync\n " );
2236
2237
// Capture the time the data transfer required for the d2h transfer.
2237
2238
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2238
- Agent , OutputSignal, TicksToTime, std::move (OmptInfo)))
2239
+ &Device , OutputSignal, TicksToTime, std::move (OmptInfo)))
2239
2240
return Err;
2240
2241
}
2241
2242
#endif
@@ -2291,7 +2292,7 @@ struct AMDGPUStreamTy {
2291
2292
DP (" OMPT-Async: Registering data timing in pushMemoryCopyD2HAsync\n " );
2292
2293
// Capture the time the data transfer required for the d2h transfer.
2293
2294
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2294
- Agent , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
2295
+ &Device , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
2295
2296
return Err;
2296
2297
}
2297
2298
#endif
@@ -2408,7 +2409,7 @@ struct AMDGPUStreamTy {
2408
2409
DP (" OMPT-Async: Registering data timing in pushMemoryCopyH2DAsync\n " );
2409
2410
// Capture the time the data transfer required for the d2h transfer.
2410
2411
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2411
- Agent , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
2412
+ &Device , OutputSignals[0 ], TicksToTime, std::move (OmptInfo)))
2412
2413
return Err;
2413
2414
}
2414
2415
#endif
@@ -2447,7 +2448,7 @@ struct AMDGPUStreamTy {
2447
2448
DP (" OMPT-Async: Registering data timing in pushMemoryCopyD2DAsync\n " );
2448
2449
// Capture the time the data transfer required for the d2h transfer.
2449
2450
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming (
2450
- Agent , OutputSignal, TicksToTime, std::move (OmptInfo)))
2451
+ &Device , OutputSignal, TicksToTime, std::move (OmptInfo)))
2451
2452
return Err;
2452
2453
}
2453
2454
#endif
@@ -3605,7 +3606,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3605
3606
#ifdef OMPT_SUPPORT
3606
3607
if (LocalOmptEventInfo) {
3607
3608
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3608
- Agent , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3609
+ this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3609
3610
if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
3610
3611
return Err;
3611
3612
}
@@ -3691,7 +3692,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3691
3692
#ifdef OMPT_SUPPORT
3692
3693
if (LocalOmptEventInfo) {
3693
3694
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3694
- Agent , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3695
+ this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3695
3696
if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
3696
3697
return Err;
3697
3698
}
@@ -3749,7 +3750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3749
3750
#ifdef OMPT_SUPPORT
3750
3751
if (LocalOmptEventInfo) {
3751
3752
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3752
- Agent , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3753
+ this , &Signal, TicksToTime, std::move (LocalOmptEventInfo)};
3753
3754
if (auto Err = timeDataTransferInNsAsync (&OmptKernelTimingArgsAsync))
3754
3755
return Err;
3755
3756
}
@@ -4268,6 +4269,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4268
4269
return Size <= OMPX_SharedDescriptorMaxSize;
4269
4270
}
4270
4271
4272
+ bool useStrictSanityChecks () const { return OMPX_StrictSanityChecks; }
4273
+
4271
4274
private:
4272
4275
using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
4273
4276
using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
@@ -5346,6 +5349,7 @@ static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
5346
5349
OmptKernelTimingArgsAsyncTy *Args =
5347
5350
reinterpret_cast <OmptKernelTimingArgsAsyncTy *>(Data);
5348
5351
assert (Args && " Invalid argument pointer" );
5352
+ assert (Args->Device && " Invalid device" );
5349
5353
assert (Args->Signal && " Invalid signal" );
5350
5354
assert (Args->OmptEventInfo && " Invalid OMPT Async data (nullptr)" );
5351
5355
assert (Args->OmptEventInfo ->TraceRecord && " Invalid Trace Record Pointer" );
@@ -5354,17 +5358,17 @@ static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
5354
5358
5355
5359
static std::pair<uint64_t , uint64_t >
5356
5360
getKernelStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args) {
5361
+ assert (Args->Device && " Invalid GenericDevice Pointer in OMPT profiling" );
5357
5362
assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
5363
+ auto *AMDGPUDevice = reinterpret_cast <AMDGPUDeviceTy *>(Args->Device );
5358
5364
hsa_amd_profiling_dispatch_time_t TimeRec{0 , 0 };
5359
5365
hsa_status_t Status = hsa_amd_profiling_get_dispatch_time (
5360
- Args-> Agent , Args->Signal ->get (), &TimeRec);
5366
+ AMDGPUDevice-> getAgent () , Args->Signal ->get (), &TimeRec);
5361
5367
if (auto Err = Plugin::check (
5362
5368
Status,
5363
5369
" WARNING Could not retrieve kernel dispatch timestamps: %s" )) {
5364
5370
MESSAGE0 (toString (std::move (Err)).data ());
5365
- static BoolEnvar OMPX_StrictSanityChecks{" OMPX_STRICT_SANITY_CHECKS" ,
5366
- false };
5367
- if (OMPX_StrictSanityChecks)
5371
+ if (AMDGPUDevice->useStrictSanityChecks ())
5368
5372
llvm_unreachable (" User-requested hard stop on sanity check errors." );
5369
5373
}
5370
5374
@@ -5376,16 +5380,16 @@ getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
5376
5380
5377
5381
static std::pair<uint64_t , uint64_t >
5378
5382
getCopyStartAndEndTime (const OmptKernelTimingArgsAsyncTy *Args) {
5383
+ assert (Args->Device && " Invalid GenericDevice Pointer in OMPT profiling" );
5379
5384
assert (Args->Signal && " Invalid AMDGPUSignal Pointer in OMPT profiling" );
5380
5385
hsa_amd_profiling_async_copy_time_t TimeRec{0 , 0 };
5381
5386
hsa_status_t Status =
5382
5387
hsa_amd_profiling_get_async_copy_time (Args->Signal ->get (), &TimeRec);
5383
5388
if (auto Err = Plugin::check (
5384
5389
Status, " WARNING Could not retrieve data-copy timestamps: %s" )) {
5385
5390
MESSAGE0 (toString (std::move (Err)).data ());
5386
- static BoolEnvar OMPX_StrictSanityChecks{" OMPX_STRICT_SANITY_CHECKS" ,
5387
- false };
5388
- if (OMPX_StrictSanityChecks)
5391
+ auto *AMDGPUDevice = reinterpret_cast <AMDGPUDeviceTy *>(Args->Device );
5392
+ if (AMDGPUDevice->useStrictSanityChecks ())
5389
5393
llvm_unreachable (" User-requested hard stop on sanity check errors." );
5390
5394
}
5391
5395
0 commit comments