Skip to content

Commit 7cee817

Browse files
authored
[Offload][OMPT] De-type profiler data in API (#340)
The data type used in many API functions was still OMPT specific. This patch changes the type to vodi * to remove this OMPT specific data type from the interfaces.
1 parent 6f44ba8 commit 7cee817

File tree

4 files changed

+59
-94
lines changed

4 files changed

+59
-94
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 54 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,13 @@ static Error timeDataTransferInNsAsync(void *Data) {
160160

161161
return Plugin::success();
162162
}
163+
164+
static void *
165+
getOrNullProfilerSpecificData(AsyncInfoWrapperTy &AsyncInfoWrapper) {
166+
__tgt_async_info *AI = AsyncInfoWrapper;
167+
return AI ? AI->ProfilerData : nullptr;
168+
}
169+
163170
} // namespace plugin
164171
} // namespace target
165172
} // namespace omp
@@ -197,40 +204,6 @@ static double getTimeOfDay() {
197204
return TimeVal;
198205
}
199206

200-
#ifdef OMPT_SUPPORT
201-
namespace llvm::omp::target::plugin {
202-
/// Returns a pointer to an OmptEventInfoTy object to be used for OMPT tracing
203-
/// or nullptr. It is the caller's duty to free the returned pointer when no
204-
/// longer needed.
205-
static ompt::OmptEventInfoTy *
206-
getOrNullOmptEventInfo(AsyncInfoWrapperTy &AsyncInfoWrapper) {
207-
__tgt_async_info *AI = AsyncInfoWrapper;
208-
if (!AI || !AI->ProfilerData)
209-
return nullptr;
210-
211-
// The profiler data is allocated in the profiler for each individual event.
212-
return reinterpret_cast<ompt::OmptEventInfoTy *>(AI->ProfilerData);
213-
}
214-
215-
} // namespace llvm::omp::target::plugin
216-
217-
#else // OMPT_SUPPORT
218-
namespace llvm::omp::target::ompt {
219-
220-
struct OmptEventInfoTy {};
221-
} // namespace llvm::omp::target::ompt
222-
223-
namespace llvm::omp::target::plugin {
224-
225-
/// When no OMPT is enabled, return nullptr to de-facto disable the profiling
226-
static ompt::OmptEventInfoTy *
227-
getOrNullOmptEventInfo(AsyncInfoWrapperTy &AsyncInfoWrapper) {
228-
return nullptr;
229-
}
230-
231-
} // namespace llvm::omp::target::plugin
232-
#endif
233-
234207
namespace llvm {
235208
namespace omp {
236209
namespace target {
@@ -1444,7 +1417,7 @@ struct AMDGPUQueueTy {
14441417
hsa_status_t Status =
14451418
hsa_queue_create(Agent, QueueSize, HSA_QUEUE_TYPE_MULTI, callbackError,
14461419
&Device, UINT32_MAX, UINT32_MAX, &Queue);
1447-
if (Device.Plugin.getProfiler()->shouldEnableProfiling() ||
1420+
if (Device.Plugin.getProfiler()->isProfilingEnabled() ||
14481421
OMPX_EnableQueueProfiling)
14491422
hsa_amd_profiling_set_profiler_enabled(Queue, /*Enable=*/1);
14501423

@@ -1810,10 +1783,11 @@ struct AMDGPUStreamTy {
18101783
Error schedProfilerKernelTiming(GenericDeviceTy *Device, hsa_agent_t Agent,
18111784
AMDGPUSignalTy *OutputSignal,
18121785
double TicksToTime,
1813-
ompt::OmptEventInfoTy *OmptData) {
1786+
void *ProfilerSpecificData) {
18141787
Callbacks.emplace_back(timeKernelInNsAsync);
1815-
ActionArgs.emplace_back().ProfilerArgs = ProfilingInfoTy{
1816-
&(Device->Plugin), Agent, OutputSignal, TicksToTime, OmptData};
1788+
ActionArgs.emplace_back().ProfilerArgs =
1789+
ProfilingInfoTy{&(Device->Plugin), Agent, OutputSignal, TicksToTime,
1790+
ProfilerSpecificData};
18171791
return Plugin::success();
18181792
}
18191793

@@ -1822,10 +1796,11 @@ struct AMDGPUStreamTy {
18221796
hsa_agent_t Agent,
18231797
AMDGPUSignalTy *OutputSignal,
18241798
double TicksToTime,
1825-
ompt::OmptEventInfoTy *OmptData) {
1799+
void *ProfilerSpecificData) {
18261800
Callbacks.emplace_back(timeDataTransferInNsAsync);
1827-
ActionArgs.emplace_back().ProfilerArgs = ProfilingInfoTy{
1828-
&(Device->Plugin), Agent, OutputSignal, TicksToTime, OmptData};
1801+
ActionArgs.emplace_back().ProfilerArgs =
1802+
ProfilingInfoTy{&(Device->Plugin), Agent, OutputSignal, TicksToTime,
1803+
ProfilerSpecificData};
18291804
return Plugin::success();
18301805
}
18311806

@@ -2200,7 +2175,7 @@ struct AMDGPUStreamTy {
22002175
uint32_t NumThreads[3], uint32_t NumBlocks[3],
22012176
uint32_t GroupSize, uint32_t StackSize,
22022177
AMDGPUMemoryManagerTy &MemoryManager,
2203-
ompt::OmptEventInfoTy *OmptInfo = nullptr) {
2178+
void *ProfilerSpecificData = nullptr) {
22042179
if (Queue == nullptr)
22052180
return Plugin::error(ErrorCode::INVALID_NULL_POINTER,
22062181
"target queue was nullptr");
@@ -2223,13 +2198,12 @@ struct AMDGPUStreamTy {
22232198

22242199
// TODO: Technically this conditional compilation is not needed anymore
22252200
#ifdef OMPT_SUPPORT
2226-
if (OmptInfo) {
2227-
DP("OMPT-Async: Info in KernelTy >> TR ptr: %p\n", OmptInfo->TraceRecord);
2201+
if (ProfilerSpecificData) {
22282202

2229-
// OmptInfo holds function pointer to finish trace record once the kernel
2230-
// completed.
2203+
// ProfilerSpecificData holds function pointer to finish trace record once
2204+
// the kernel completed.
22312205
if (auto Err = Slots[Curr].schedProfilerKernelTiming(
2232-
&Device, Agent, OutputSignal, TicksToTime, OmptInfo))
2206+
&Device, Agent, OutputSignal, TicksToTime, ProfilerSpecificData))
22332207
return Err;
22342208
}
22352209
#endif
@@ -2302,7 +2276,7 @@ struct AMDGPUStreamTy {
23022276

23032277
/// Push an asynchronous memory copy between pinned memory buffers.
23042278
Error pushPinnedMemoryCopyAsync(void *Dst, const void *Src, uint64_t CopySize,
2305-
ompt::OmptEventInfoTy *OmptInfo = nullptr) {
2279+
void *ProfilerSpecificData = nullptr) {
23062280
// Retrieve an available signal for the operation's output.
23072281
AMDGPUSignalTy *OutputSignal = nullptr;
23082282
if (auto Err = SignalManager.getResource(OutputSignal))
@@ -2317,11 +2291,10 @@ struct AMDGPUStreamTy {
23172291

23182292
// TODO: Technically this conditional compilation is not needed anymore
23192293
#ifdef OMPT_SUPPORT
2320-
if (OmptInfo) {
2321-
DP("OMPT-Async: Registering data timing in pushPinnedMemoryCopyAsync\n");
2294+
if (ProfilerSpecificData) {
23222295
// Capture the time the data transfer required for the d2h transfer.
23232296
if (auto Err = Slots[Curr].schedProfilerDataTransferTiming(
2324-
&Device, Agent, OutputSignal, TicksToTime, OmptInfo))
2297+
&Device, Agent, OutputSignal, TicksToTime, ProfilerSpecificData))
23252298
return Err;
23262299
}
23272300
#endif
@@ -2348,7 +2321,7 @@ struct AMDGPUStreamTy {
23482321
Error pushMemoryCopyD2HAsync(void *Dst, const void *Src, void *Inter,
23492322
uint64_t CopySize,
23502323
AMDGPUMemoryManagerTy &MemoryManager,
2351-
ompt::OmptEventInfoTy *OmptInfo = nullptr) {
2324+
void *ProfilerSpecificData = nullptr) {
23522325
// Retrieve available signals for the operation's outputs.
23532326
AMDGPUSignalTy *OutputSignals[2] = {};
23542327
if (auto Err = SignalManager.getResources(/*Num=*/2, OutputSignals))
@@ -2374,11 +2347,11 @@ struct AMDGPUStreamTy {
23742347

23752348
// TODO: Technically this conditional compilation is not needed anymore
23762349
#ifdef OMPT_SUPPORT
2377-
if (OmptInfo) {
2378-
DP("OMPT-Async: Registering data timing in pushMemoryCopyD2HAsync\n");
2350+
if (ProfilerSpecificData) {
23792351
// Capture the time the data transfer required for the d2h transfer.
23802352
if (auto Err = Slots[Curr].schedProfilerDataTransferTiming(
2381-
&Device, Agent, OutputSignals[0], TicksToTime, OmptInfo))
2353+
&Device, Agent, OutputSignals[0], TicksToTime,
2354+
ProfilerSpecificData))
23822355
return Err;
23832356
}
23842357
#endif
@@ -2435,7 +2408,7 @@ struct AMDGPUStreamTy {
24352408
Error pushMemoryCopyH2DAsync(void *Dst, const void *Src, void *Inter,
24362409
uint64_t CopySize,
24372410
AMDGPUMemoryManagerTy &MemoryManager,
2438-
ompt::OmptEventInfoTy *OmptInfo = nullptr,
2411+
void *ProfilerSpecificData = nullptr,
24392412
size_t NumTimes = 1) {
24402413
// Retrieve available signals for the operation's outputs.
24412414
AMDGPUSignalTy *OutputSignals[2] = {};
@@ -2498,11 +2471,11 @@ struct AMDGPUStreamTy {
24982471

24992472
// TODO: Technically, this conditional compilation is not needed anymore
25002473
#ifdef OMPT_SUPPORT
2501-
if (OmptInfo) {
2502-
DP("OMPT-Async: Registering data timing in pushMemoryCopyH2DAsync\n");
2474+
if (ProfilerSpecificData) {
25032475
// Capture the time the data transfer required for the d2h transfer.
25042476
if (auto Err = Slots[Curr].schedProfilerDataTransferTiming(
2505-
&Device, Agent, OutputSignals[0], TicksToTime, OmptInfo))
2477+
&Device, Agent, OutputSignals[0], TicksToTime,
2478+
ProfilerSpecificData))
25062479
return Err;
25072480
}
25082481
#endif
@@ -2523,7 +2496,7 @@ struct AMDGPUStreamTy {
25232496
// AMDGPUDeviceTy is incomplete here, passing the underlying agent instead
25242497
Error pushMemoryCopyD2DAsync(void *Dst, hsa_agent_t DstAgent, const void *Src,
25252498
hsa_agent_t SrcAgent, uint64_t CopySize,
2526-
ompt::OmptEventInfoTy *OmptInfo = nullptr) {
2499+
void *ProfilerSpecificData = nullptr) {
25272500
AMDGPUSignalTy *OutputSignal;
25282501
if (auto Err = SignalManager.getResources(/*Num=*/1, &OutputSignal))
25292502
return Err;
@@ -2537,11 +2510,10 @@ struct AMDGPUStreamTy {
25372510

25382511
// TODO: Technically, this conditional compilation is not needed anymore
25392512
#ifdef OMPT_SUPPORT
2540-
if (OmptInfo) {
2541-
DP("OMPT-Async: Registering data timing in pushMemoryCopyD2DAsync\n");
2513+
if (ProfilerSpecificData) {
25422514
// Capture the time the data transfer required for the d2h transfer.
25432515
if (auto Err = Slots[Curr].schedProfilerDataTransferTiming(
2544-
&Device, Agent, OutputSignal, TicksToTime, OmptInfo))
2516+
&Device, Agent, OutputSignal, TicksToTime, ProfilerSpecificData))
25452517
return Err;
25462518
}
25472519
#endif
@@ -3746,8 +3718,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37463718
void *PinnedPtr = nullptr;
37473719

37483720
// Obtain the OMPT-related callback data
3749-
DP("OMPT-Async: dataSubmitImpl\n");
3750-
auto LocalOmptEventInfo = getOrNullOmptEventInfo(AsyncInfoWrapper);
3721+
auto ProfilerSpecificData = getOrNullProfilerSpecificData(AsyncInfoWrapper);
37513722

37523723
// Prefault GPU page table in XNACK-Enabled case, on APUs,
37533724
// under the assumption that explicitly allocated memory
@@ -3763,9 +3734,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37633734
PinnedAllocs.getDeviceAccessiblePtrFromPinnedBuffer(HstPtr)) {
37643735
if (auto Err = getStream(AsyncInfoWrapper, Stream))
37653736
return Err;
3766-
DP("OMPT-Async: Pinned Copy\n");
37673737
return Stream->pushPinnedMemoryCopyAsync(TgtPtr, PinnedPtr, Size,
3768-
LocalOmptEventInfo);
3738+
ProfilerSpecificData);
37693739
}
37703740

37713741
// For large transfers use synchronous behavior.
@@ -3793,7 +3763,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
37933763
if (auto Err = Signal.init())
37943764
return Err;
37953765

3796-
DP("OMPT-Async: Sync Copy\n");
37973766
if (auto Err = hsa_utils::asyncMemCopy(useMultipleSdmaEngines(), TgtPtr,
37983767
Agent, PinnedPtr, Agent, Size, 0,
37993768
nullptr, Signal.get()))
@@ -3803,9 +3772,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38033772
return Err;
38043773

38053774
#ifdef OMPT_SUPPORT
3806-
if (LocalOmptEventInfo) {
3775+
if (Plugin.getProfiler()->isProfilingEnabled()) {
38073776
ProfilingInfoTy OmptKernelTimingArgsAsync{
3808-
&Plugin, Agent, &Signal, TicksToTime, LocalOmptEventInfo};
3777+
&Plugin, Agent, &Signal, TicksToTime, ProfilerSpecificData};
38093778

38103779
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
38113780
return Err;
@@ -3828,10 +3797,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38283797
if (auto Err = getStream(AsyncInfoWrapper, Stream))
38293798
return Err;
38303799

3831-
DP("OMPT-Async: ASync Copy\n");
38323800
return Stream->pushMemoryCopyH2DAsync(TgtPtr, HstPtr, PinnedPtr, Size,
38333801
PinnedMemoryManager,
3834-
LocalOmptEventInfo);
3802+
ProfilerSpecificData);
38353803
}
38363804

38373805
/// Retrieve data from the device (device to host transfer).
@@ -3841,8 +3809,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38413809
void *PinnedPtr = nullptr;
38423810

38433811
// Obtain the OMPT-related callback data
3844-
DP("OMPT-Async: dataRetrieveImpl\n");
3845-
auto LocalOmptEventInfo = getOrNullOmptEventInfo(AsyncInfoWrapper);
3812+
auto ProfilerSpecificData = getOrNullProfilerSpecificData(AsyncInfoWrapper);
38463813

38473814
// Prefault GPU page table in XNACK-Enabled case, on APUs,
38483815
// under the assumption that explicitly allocated memory
@@ -3858,9 +3825,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38583825
PinnedAllocs.getDeviceAccessiblePtrFromPinnedBuffer(HstPtr)) {
38593826
if (auto Err = getStream(AsyncInfoWrapper, Stream))
38603827
return Err;
3861-
DP("OMPT-Async: Pinned Copy\n");
38623828
return Stream->pushPinnedMemoryCopyAsync(PinnedPtr, TgtPtr, Size,
3863-
LocalOmptEventInfo);
3829+
ProfilerSpecificData);
38643830
}
38653831

38663832
// For large transfers use synchronous behavior.
@@ -3898,9 +3864,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
38983864
return Err;
38993865

39003866
#ifdef OMPT_SUPPORT
3901-
if (LocalOmptEventInfo) {
3867+
if (Plugin.getProfiler()->isProfilingEnabled()) {
39023868
ProfilingInfoTy OmptKernelTimingArgsAsync{
3903-
&Plugin, Agent, &Signal, TicksToTime, LocalOmptEventInfo};
3869+
&Plugin, Agent, &Signal, TicksToTime, ProfilerSpecificData};
39043870

39053871
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
39063872
return Err;
@@ -3925,7 +3891,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39253891

39263892
return Stream->pushMemoryCopyD2HAsync(HstPtr, TgtPtr, PinnedPtr, Size,
39273893
PinnedMemoryManager,
3928-
LocalOmptEventInfo);
3894+
ProfilerSpecificData);
39293895
}
39303896

39313897
/// Exchange data between two devices within the plugin.
@@ -3934,8 +3900,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39343900
AsyncInfoWrapperTy &AsyncInfoWrapper) override {
39353901
AMDGPUDeviceTy &DstDevice = static_cast<AMDGPUDeviceTy &>(DstGenericDevice);
39363902

3937-
DP("OMPT-Async: dataExchangeImpl\n");
3938-
auto LocalOmptEventInfo = getOrNullOmptEventInfo(AsyncInfoWrapper);
3903+
auto ProfilerSpecificData = getOrNullProfilerSpecificData(AsyncInfoWrapper);
39393904

39403905
// For large transfers use synchronous behavior.
39413906
// If OMPT is enabled or synchronous behavior is explicitly requested:
@@ -3957,9 +3922,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39573922
return Err;
39583923

39593924
#ifdef OMPT_SUPPORT
3960-
if (LocalOmptEventInfo) {
3925+
if (Plugin.getProfiler()->isProfilingEnabled()) {
39613926
ProfilingInfoTy OmptKernelTimingArgsAsync{
3962-
&Plugin, Agent, &Signal, TicksToTime, LocalOmptEventInfo};
3927+
&Plugin, Agent, &Signal, TicksToTime, ProfilerSpecificData};
39633928

39643929
if (auto Err = timeDataTransferInNsAsync(&OmptKernelTimingArgsAsync))
39653930
return Err;
@@ -3977,7 +3942,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
39773942

39783943
return Stream->pushMemoryCopyD2DAsync(DstPtr, DstDevice.getAgent(), SrcPtr,
39793944
getAgent(), (uint64_t)Size,
3980-
LocalOmptEventInfo);
3945+
ProfilerSpecificData);
39813946
}
39823947

39833948
/// Insert a data fence between previous data operations and the following
@@ -5601,12 +5566,12 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
56015566
}
56025567

56035568
// Get required OMPT-related data
5604-
auto LocalOmptEventInfo = getOrNullOmptEventInfo(AsyncInfoWrapper);
5569+
auto ProfilerSpecificData = getOrNullProfilerSpecificData(AsyncInfoWrapper);
56055570

56065571
// Push the kernel launch into the stream.
56075572
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
56085573
GroupSize, static_cast<uint32_t>(StackSize),
5609-
ArgsMemoryManager, LocalOmptEventInfo);
5574+
ArgsMemoryManager, ProfilerSpecificData);
56105575
}
56115576

56125577
void AMDGPUKernelTy::printAMDOneLineKernelTrace(GenericDeviceTy &GenericDevice,
@@ -5847,8 +5812,8 @@ static ProfilingInfoTy *getProfilingInfo(void *Data) {
58475812

58485813
static std::pair<uint64_t, uint64_t>
58495814
getKernelStartAndEndTime(const ProfilingInfoTy *Args) {
5850-
assert(Args->Plugin && "Invalid GenericPlugin pointer in OMPT profiling");
5851-
assert(Args->Signal && "Invalid AMDGPUSignal pointer in OMPT profiling");
5815+
assert(Args->Plugin && "Invalid GenericPlugin pointer in profiling");
5816+
assert(Args->Signal && "Invalid AMDGPUSignal pointer in profiling");
58525817

58535818
hsa_amd_profiling_dispatch_time_t TimeRec{0, 0};
58545819
hsa_status_t Status = hsa_amd_profiling_get_dispatch_time(
@@ -5872,7 +5837,7 @@ getKernelStartAndEndTime(const ProfilingInfoTy *Args) {
58725837

58735838
static std::pair<uint64_t, uint64_t>
58745839
getCopyStartAndEndTime(const ProfilingInfoTy *Args) {
5875-
assert(Args->Signal && "Invalid AMDGPUSignal Pointer in OMPT profiling");
5840+
assert(Args->Signal && "Invalid AMDGPUSignal Pointer in profiling");
58765841

58775842
hsa_amd_profiling_async_copy_time_t TimeRec{0, 0};
58785843
hsa_status_t Status =

0 commit comments

Comments
 (0)