Skip to content

Commit b81a29f

Browse files
authored
[offload][OMPT][amdgpu] Async timing sanity check follow up (llvm#1463)
2 parents bad2908 + 65e43a7 commit b81a29f

File tree

1 file changed

+25
-21
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+25
-21
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ namespace plugin {
138138
struct AMDGPUSignalTy;
139139
/// Use to transport information to OMPT timing functions.
140140
struct OmptKernelTimingArgsAsyncTy {
141-
hsa_agent_t Agent;
141+
GenericDeviceTy *Device;
142142
AMDGPUSignalTy *Signal;
143143
double TicksToTime;
144144
std::unique_ptr<ompt::OmptEventInfoTy> OmptEventInfo;
@@ -1773,21 +1773,22 @@ struct AMDGPUStreamTy {
17731773
#ifdef OMPT_SUPPORT
17741774
/// Schedule OMPT kernel timing on the slot.
17751775
Error schedOmptAsyncKernelTiming(
1776-
hsa_agent_t Agent, AMDGPUSignalTy *OutputSignal, double TicksToTime,
1777-
std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
1776+
GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
1777+
double TicksToTime, std::unique_ptr<ompt::OmptEventInfoTy> OMPTData) {
17781778
OmptActionFunction = timeKernelInNsAsync;
17791779
OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1780-
Agent, OutputSignal, TicksToTime, std::move(OMPTData)};
1780+
Device, OutputSignal, TicksToTime, std::move(OMPTData)};
17811781
return Plugin::success();
17821782
}
17831783

17841784
/// Schedule OMPT data transfer timing on the slot
17851785
Error schedOmptAsyncD2HTransferTiming(
1786-
hsa_agent_t Agent, AMDGPUSignalTy *OutputSignal, double TicksToTime,
1786+
GenericDeviceTy *Device, AMDGPUSignalTy *OutputSignal,
1787+
double TicksToTime,
17871788
std::unique_ptr<ompt::OmptEventInfoTy> OmptInfoData) {
17881789
OmptActionFunction = timeDataTransferInNsAsync;
17891790
OmptKernelTimingArgsAsync = OmptKernelTimingArgsAsyncTy{
1790-
Agent, OutputSignal, TicksToTime, std::move(OmptInfoData)};
1791+
Device, OutputSignal, TicksToTime, std::move(OmptInfoData)};
17911792
return Plugin::success();
17921793
}
17931794
#endif
@@ -2158,7 +2159,7 @@ struct AMDGPUStreamTy {
21582159
// OmptInfo holds function pointer to finish trace record once the kernel
21592160
// completed.
21602161
if (auto Err = Slots[Curr].schedOmptAsyncKernelTiming(
2161-
Agent, OutputSignal, TicksToTime, std::move(OmptInfo)))
2162+
&Device, OutputSignal, TicksToTime, std::move(OmptInfo)))
21622163
return Err;
21632164
}
21642165
#endif
@@ -2235,7 +2236,7 @@ struct AMDGPUStreamTy {
22352236
DP("OMPT-Async: Registering data timing in pushPinnedMemoryCopyAsync\n");
22362237
// Capture the time the data transfer required for the d2h transfer.
22372238
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming(
2238-
Agent, OutputSignal, TicksToTime, std::move(OmptInfo)))
2239+
&Device, OutputSignal, TicksToTime, std::move(OmptInfo)))
22392240
return Err;
22402241
}
22412242
#endif
@@ -2291,7 +2292,7 @@ struct AMDGPUStreamTy {
22912292
DP("OMPT-Async: Registering data timing in pushMemoryCopyD2HAsync\n");
22922293
// Capture the time the data transfer required for the d2h transfer.
22932294
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming(
2294-
Agent, OutputSignals[0], TicksToTime, std::move(OmptInfo)))
2295+
&Device, OutputSignals[0], TicksToTime, std::move(OmptInfo)))
22952296
return Err;
22962297
}
22972298
#endif
@@ -2408,7 +2409,7 @@ struct AMDGPUStreamTy {
24082409
DP("OMPT-Async: Registering data timing in pushMemoryCopyH2DAsync\n");
24092410
// Capture the time the data transfer required for the d2h transfer.
24102411
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming(
2411-
Agent, OutputSignals[0], TicksToTime, std::move(OmptInfo)))
2412+
&Device, OutputSignals[0], TicksToTime, std::move(OmptInfo)))
24122413
return Err;
24132414
}
24142415
#endif
@@ -2447,7 +2448,7 @@ struct AMDGPUStreamTy {
24472448
DP("OMPT-Async: Registering data timing in pushMemoryCopyD2DAsync\n");
24482449
// Capture the time the data transfer required for the d2h transfer.
24492450
if (auto Err = Slots[Curr].schedOmptAsyncD2HTransferTiming(
2450-
Agent, OutputSignal, TicksToTime, std::move(OmptInfo)))
2451+
&Device, OutputSignal, TicksToTime, std::move(OmptInfo)))
24512452
return Err;
24522453
}
24532454
#endif
@@ -3605,7 +3606,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
36053606
#ifdef OMPT_SUPPORT
36063607
if (LocalOmptEventInfo) {
36073608
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3608-
Agent, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
3609+
this, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
36093610
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
36103611
return Err;
36113612
}
@@ -3691,7 +3692,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
36913692
#ifdef OMPT_SUPPORT
36923693
if (LocalOmptEventInfo) {
36933694
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3694-
Agent, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
3695+
this, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
36953696
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
36963697
return Err;
36973698
}
@@ -3749,7 +3750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37493750
#ifdef OMPT_SUPPORT
37503751
if (LocalOmptEventInfo) {
37513752
OmptKernelTimingArgsAsyncTy OmptKernelTimingArgsAsync{
3752-
Agent, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
3753+
this, &Signal, TicksToTime, std::move(LocalOmptEventInfo)};
37533754
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
37543755
return Err;
37553756
}
@@ -4268,6 +4269,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
42684269
return Size <= OMPX_SharedDescriptorMaxSize;
42694270
}
42704271

4272+
bool useStrictSanityChecks() const { return OMPX_StrictSanityChecks; }
4273+
42714274
private:
42724275
using AMDGPUEventRef = AMDGPUResourceRef<AMDGPUEventTy>;
42734276
using AMDGPUEventManagerTy = GenericDeviceResourceManagerTy<AMDGPUEventRef>;
@@ -5346,6 +5349,7 @@ static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
53465349
OmptKernelTimingArgsAsyncTy *Args =
53475350
reinterpret_cast<OmptKernelTimingArgsAsyncTy *>(Data);
53485351
assert(Args && "Invalid argument pointer");
5352+
assert(Args->Device && "Invalid device");
53495353
assert(Args->Signal && "Invalid signal");
53505354
assert(Args->OmptEventInfo && "Invalid OMPT Async data (nullptr)");
53515355
assert(Args->OmptEventInfo->TraceRecord && "Invalid Trace Record Pointer");
@@ -5354,17 +5358,17 @@ static OmptKernelTimingArgsAsyncTy *getOmptTimingsArgs(void *Data) {
53545358

53555359
static std::pair<uint64_t, uint64_t>
53565360
getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
5361+
assert(Args->Device && "Invalid GenericDevice Pointer in OMPT profiling");
53575362
assert(Args->Signal && "Invalid AMDGPUSignal Pointer in OMPT profiling");
5363+
auto *AMDGPUDevice = reinterpret_cast<AMDGPUDeviceTy *>(Args->Device);
53585364
hsa_amd_profiling_dispatch_time_t TimeRec{0, 0};
53595365
hsa_status_t Status = hsa_amd_profiling_get_dispatch_time(
5360-
Args->Agent, Args->Signal->get(), &TimeRec);
5366+
AMDGPUDevice->getAgent(), Args->Signal->get(), &TimeRec);
53615367
if (auto Err = Plugin::check(
53625368
Status,
53635369
"WARNING Could not retrieve kernel dispatch timestamps: %s")) {
53645370
MESSAGE0(toString(std::move(Err)).data());
5365-
static BoolEnvar OMPX_StrictSanityChecks{"OMPX_STRICT_SANITY_CHECKS",
5366-
false};
5367-
if (OMPX_StrictSanityChecks)
5371+
if (AMDGPUDevice->useStrictSanityChecks())
53685372
llvm_unreachable("User-requested hard stop on sanity check errors.");
53695373
}
53705374

@@ -5376,16 +5380,16 @@ getKernelStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
53765380

53775381
static std::pair<uint64_t, uint64_t>
53785382
getCopyStartAndEndTime(const OmptKernelTimingArgsAsyncTy *Args) {
5383+
assert(Args->Device && "Invalid GenericDevice Pointer in OMPT profiling");
53795384
assert(Args->Signal && "Invalid AMDGPUSignal Pointer in OMPT profiling");
53805385
hsa_amd_profiling_async_copy_time_t TimeRec{0, 0};
53815386
hsa_status_t Status =
53825387
hsa_amd_profiling_get_async_copy_time(Args->Signal->get(), &TimeRec);
53835388
if (auto Err = Plugin::check(
53845389
Status, "WARNING Could not retrieve data-copy timestamps: %s")) {
53855390
MESSAGE0(toString(std::move(Err)).data());
5386-
static BoolEnvar OMPX_StrictSanityChecks{"OMPX_STRICT_SANITY_CHECKS",
5387-
false};
5388-
if (OMPX_StrictSanityChecks)
5391+
auto *AMDGPUDevice = reinterpret_cast<AMDGPUDeviceTy *>(Args->Device);
5392+
if (AMDGPUDevice->useStrictSanityChecks())
53895393
llvm_unreachable("User-requested hard stop on sanity check errors.");
53905394
}
53915395

0 commit comments

Comments
 (0)