Skip to content

Commit a852d97

Browse files
authored
[OMPT] Avoid unused allocations & code clean up (#378)
This moves the allocation for ProfilerSpecificData into the TraceGeneratorRAII object, thus, it only allocates memory for events that are actually traced. This is the result of @dhruvachak comment on my previous OMPT PR. While we weren't leaking memory, we would do extra allocations which would accumulate and not be freed. This patch addresses that issue. The allocation is now done inside `TraceInterfaceRAII` in Interface.h when the event has _not_ been filtered. Tested on single and multi GPU systems. The way that the higher-level (Interface.h / device.cpp` talks to the lower-level (plugins) is unchanged through this PR and I'm still looking at it / thinking about it. It also does a bit of NFCI code clean up that can be split out of this commit if desired.
1 parent b06c892 commit a852d97

File tree

5 files changed

+71
-83
lines changed

5 files changed

+71
-83
lines changed

offload/include/OpenMP/OMPT/Interface.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "Shared/Debug.h"
2222
#include "omp-tools.h"
2323

24+
#include "GenericProfiler.h"
25+
2426
#include "llvm/Support/ErrorHandling.h"
2527

2628
#include <functional>
@@ -473,16 +475,20 @@ template <typename FunctionPairTy, typename AsyncInfoTy, typename... ArgsTy>
473475
class TracerInterfaceRAII {
474476
public:
475477
TracerInterfaceRAII(FunctionPairTy Callbacks, AsyncInfoTy &AsyncInfo,
476-
void *OmptSpecificData, int TracedDeviceId,
478+
plugin::GenericProfilerTy *Prof, int TracedDeviceId,
477479
ompt_callbacks_t EventType, ArgsTy... Args)
478480
: Arguments(Args...), beginFunction(std::get<0>(Callbacks)) {
479481
__tgt_async_info *AI = AsyncInfo;
480482
if (isTracingEnabled(TracedDeviceId, EventType)) {
481483
auto Record = begin();
482484

483-
// Access the already allocated profiler data and populate it
485+
// The Profiler can allocate specific data to be used to pass information
486+
// from here to lower parts of the runtime system.
487+
// NOTE: It is the responsibility of the programmer to ensure type
488+
// compatibility and correct usage of the data. The profiler, however,
489+
// OWNS the pointer and frees it at an appropriate time.
484490
OmptEventInfoTy *ProfilerData =
485-
reinterpret_cast<OmptEventInfoTy *>(OmptSpecificData);
491+
reinterpret_cast<OmptEventInfoTy *>(Prof->getProfilerSpecificData());
486492
ProfilerData->TraceRecord = Record;
487493
ProfilerData->NumTeams = 0;
488494

offload/libomptarget/device.cpp

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -245,18 +245,12 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
245245
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
246246
omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
247247
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);
248-
// The Profiler can allocate specific data to be used to pass information
249-
// from here to lower parts of the runtime system.
250-
// NOTE: It is the responsibility of the programmer to ensure type
251-
// compatibility and correct usage of the data. The profiler, however,
252-
// OWNS the pointer and frees it at an appropriate time.
253-
void *ProfData = RTL->getProfiler()->getProfilerSpecificData();
254248
// Only if 'TracedDeviceId' is actually traced, AsyncInfo->OmptEventInfo
255249
// is set and a trace record generated. Otherwise: No OMPT device tracing.
256250
TracerInterfaceRAII TargetDataSubmitTraceRAII(
257251
RegionInterface
258252
.getTraceGenerators<ompt_target_data_transfer_to_device>(),
259-
AsyncInfo, ProfData, /*TracedDeviceId=*/DeviceID,
253+
AsyncInfo, RTL->getProfiler(), /*TracedDeviceId=*/DeviceID,
260254
/*EventType=*/ompt_callback_target_data_op, omp_get_initial_device(),
261255
HstPtrBegin, DeviceID, TgtPtrBegin, Size,
262256
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
@@ -281,18 +275,12 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
281275
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
282276
DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
283277
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);
284-
// The Profiler can allocate specific data to be used to pass information
285-
// from here to lower parts of the runtime system.
286-
// NOTE: It is the responsibility of the programmer to ensure type
287-
// compatibility and correct usage of the data. The profiler, however,
288-
// OWNS the pointer and frees it at an appropriate time.
289-
void *ProfData = RTL->getProfiler()->getProfilerSpecificData();
290278
// Only if 'TracedDeviceId' is actually traced, AsyncInfo->OmptEventInfo
291279
// is set and a trace record generated. Otherwise: No OMPT device tracing.
292280
TracerInterfaceRAII TargetDataSubmitTraceRAII(
293281
RegionInterface
294282
.getTraceGenerators<ompt_target_data_transfer_from_device>(),
295-
AsyncInfo, ProfData, /*TracedDeviceId=*/DeviceID,
283+
AsyncInfo, RTL->getProfiler(), /*TracedDeviceId=*/DeviceID,
296284
/*EventType=*/ompt_callback_target_data_op, DeviceID, TgtPtrBegin,
297285
omp_get_initial_device(), HstPtrBegin, Size,
298286
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
@@ -316,18 +304,12 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
316304
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
317305
RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
318306
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);
319-
// The Profiler can allocate specific data to be used to pass information
320-
// from here to lower parts of the runtime system.
321-
// NOTE: It is the responsibility of the programmer to ensure type
322-
// compatibility and correct usage of the data. The profiler, however,
323-
// OWNS the pointer and frees it at an appropriate time.
324-
void *ProfData = RTL->getProfiler()->getProfilerSpecificData();
325307
// Only if 'TracedDeviceId' is actually traced, AsyncInfo->OmptEventInfo
326308
// is set and a trace record generated. Otherwise: No OMPT device tracing.
327309
TracerInterfaceRAII TargetDataExchangeTraceRAII(
328310
RegionInterface
329311
.getTraceGenerators<ompt_target_data_transfer_from_device>(),
330-
AsyncInfo, ProfData, /*TracedDeviceId=*/RTLDeviceID,
312+
AsyncInfo, RTL->getProfiler(), /*TracedDeviceId=*/RTLDeviceID,
331313
/*EventType=*/ompt_callback_target_data_op, RTLDeviceID, SrcPtr,
332314
DstDev.RTLDeviceID, DstPtr, Size,
333315
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)

offload/libomptarget/omptarget.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2101,13 +2101,6 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
21012101
InterfaceRAII TargetSubmitRAII(
21022102
RegionInterface.getCallbacks<ompt_callback_target_submit>(), NumTeams);
21032103

2104-
// The Profiler can allocate specific data to be used to pass information
2105-
// from here to lower parts of the runtime system.
2106-
// NOTE: It is the responsibility of the programmer to ensure type
2107-
// compatibility and correct usage of the data. The profiler, however, OWNS
2108-
// the pointer and frees it at an appropriate time.
2109-
void *ProfData = Device.RTL->getProfiler()->getProfilerSpecificData();
2110-
21112104
// Calls "begin" for the OMPT trace record and let the plugin
21122105
// enqueue the stop operation for after the kernel is done. The stop
21132106
// operation completes the trace record entry with the information from
@@ -2116,7 +2109,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
21162109
// set and a trace record generated. Otherwise: No OMPT device tracing.
21172110
TracerInterfaceRAII TargetTraceRAII(
21182111
RegionInterface.getTraceGenerators<ompt_callback_target_submit>(),
2119-
AsyncInfo, ProfData, /*TracedDeviceId=*/DeviceId,
2112+
AsyncInfo, Device.RTL->getProfiler(), /*TracedDeviceId=*/DeviceId,
21202113
/*EventType=*/ompt_callback_target_submit, DeviceId, NumTeams);
21212114
#endif
21222115
Ret = Device.launchKernel(TgtEntryPtr, TgtArgs.data(), TgtOffsets.data(),

offload/plugins-nextgen/common/OMPT/OmptProfiler.cpp

Lines changed: 58 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "OmptProfiler.h"
14+
#include "OpenMP/OMPT/Interface.h"
1415
#include "PluginInterface.h"
1516
#include "Shared/Debug.h"
1617

17-
void llvm::omp::target::ompt::OmptProfilerTy::handleInit(
18-
llvm::omp::target::plugin::GenericDeviceTy *Device,
19-
llvm::omp::target::plugin::GenericPluginTy *Plugin) {
18+
using namespace llvm::omp::target;
19+
20+
void ompt::OmptProfilerTy::handleInit(plugin::GenericDeviceTy *Device,
21+
plugin::GenericPluginTy *Plugin) {
2022
auto DeviceId = Device->getDeviceId();
2123
auto DevicePtr = reinterpret_cast<ompt_device_t *>(Device);
2224
ompt::setDeviceId(DevicePtr, Plugin->getUserId(DeviceId));
@@ -32,9 +34,8 @@ void llvm::omp::target::ompt::OmptProfilerTy::handleInit(
3234
}
3335
}
3436

35-
void llvm::omp::target::ompt::OmptProfilerTy::handleDeinit(
36-
llvm::omp::target::plugin::GenericDeviceTy *Device,
37-
llvm::omp::target::plugin::GenericPluginTy *Plugin) {
37+
void ompt::OmptProfilerTy::handleDeinit(
38+
plugin::GenericDeviceTy *Device, target::plugin::GenericPluginTy *Plugin) {
3839
auto DeviceId = Device->getDeviceId();
3940

4041
if (ompt::Initialized) {
@@ -45,50 +46,55 @@ void llvm::omp::target::ompt::OmptProfilerTy::handleDeinit(
4546
ompt::removeDeviceId(reinterpret_cast<ompt_device_t *>(Device));
4647
}
4748

48-
void llvm::omp::target::ompt::OmptProfilerTy::handleLoadBinary(
49-
llvm::omp::target::plugin::GenericDeviceTy *Device,
50-
llvm::omp::target::plugin::GenericPluginTy *Plugin,
51-
const StringRef InputTgtImage) {
52-
auto DeviceId = Device->getDeviceId();
49+
void ompt::OmptProfilerTy::handleLoadBinary(plugin::GenericDeviceTy *Device,
50+
plugin::GenericPluginTy *Plugin,
51+
const StringRef InputTgtImage) {
5352

54-
if (ompt::Initialized) {
55-
size_t Bytes = InputTgtImage.size();
56-
performOmptCallback(
57-
device_load, Plugin->getUserId(DeviceId),
58-
/*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr,
59-
/*ImgSize=*/Bytes,
60-
/*HostAddr=*/const_cast<unsigned char *>(InputTgtImage.bytes_begin()),
61-
/*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0);
62-
}
53+
if (!ompt::Initialized)
54+
return;
55+
56+
auto DeviceId = Device->getDeviceId();
57+
size_t Bytes = InputTgtImage.size();
58+
performOmptCallback(
59+
device_load, Plugin->getUserId(DeviceId),
60+
/*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr,
61+
/*ImgSize=*/Bytes,
62+
/*HostAddr=*/const_cast<unsigned char *>(InputTgtImage.bytes_begin()),
63+
/*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0);
6364
}
6465

65-
void llvm::omp::target::ompt::OmptProfilerTy::handleDataAlloc(
66-
uint64_t StartNanos, uint64_t EndNanos, void *HostPtr, uint64_t Size,
67-
void *Data) {
66+
void ompt::OmptProfilerTy::handleDataAlloc(uint64_t StartNanos,
67+
uint64_t EndNanos, void *HostPtr,
68+
uint64_t Size, void *Data) {
6869
ompt::setOmptTimestamp(StartNanos, EndNanos);
6970
}
7071

71-
void llvm::omp::target::ompt::OmptProfilerTy::handleDataDelete(
72-
uint64_t StartNanos, uint64_t EndNanos, void *TgtPtr, void *Data) {
72+
void ompt::OmptProfilerTy::handleDataDelete(uint64_t StartNanos,
73+
uint64_t EndNanos, void *TgtPtr,
74+
void *Data) {
7375
ompt::setOmptTimestamp(StartNanos, EndNanos);
7476
}
7577

76-
void llvm::omp::target::ompt::OmptProfilerTy::handlePreKernelLaunch(
77-
llvm::omp::target::plugin::GenericDeviceTy *Device, uint32_t NumBlocks[3],
78+
void ompt::OmptProfilerTy::handlePreKernelLaunch(
79+
plugin::GenericDeviceTy *Device, uint32_t NumBlocks[3],
7880
__tgt_async_info *AI) {
79-
OMPT_IF_TRACING_ENABLED(
80-
if (llvm::omp::target::ompt::isTracedDevice(getDeviceId(Device))) {
81-
if (AI->ProfilerData != nullptr) {
82-
auto ProfilerSpecificData = reinterpret_cast<ompt::OmptEventInfoTy *>(AI->ProfilerData);
83-
// Set number of granted teams for OMPT
84-
setOmptGrantedNumTeams(NumBlocks[0]);
85-
ProfilerSpecificData->NumTeams = NumBlocks[0];
86-
}
87-
});
81+
if (!ompt::isTracedDevice(getDeviceId(Device)))
82+
return;
83+
84+
if (AI->ProfilerData == nullptr)
85+
return;
86+
87+
auto ProfilerSpecificData =
88+
reinterpret_cast<ompt::OmptEventInfoTy *>(AI->ProfilerData);
89+
assert(ProfilerSpecificData && "Invalid ProfilerSpecificData");
90+
// Set number of granted teams for OMPT
91+
setOmptGrantedNumTeams(NumBlocks[0]);
92+
ProfilerSpecificData->NumTeams = NumBlocks[0];
8893
}
8994

90-
void llvm::omp::target::ompt::OmptProfilerTy::handleKernelCompletion(
91-
uint64_t StartNanos, uint64_t EndNanos, void *Data) {
95+
void ompt::OmptProfilerTy::handleKernelCompletion(uint64_t StartNanos,
96+
uint64_t EndNanos,
97+
void *Data) {
9298

9399
if (!isProfilingEnabled())
94100
return;
@@ -98,24 +104,24 @@ void llvm::omp::target::ompt::OmptProfilerTy::handleKernelCompletion(
98104
if (!Data)
99105
return;
100106

101-
DP("OMPT-Async: Time kernel for asynchronous execution (Plugin): Start %lu "
107+
DP("OMPT-Async: Time kernel for asynchronous execution: Start %lu "
102108
"End %lu\n",
103109
StartNanos, EndNanos);
104110

105111
auto OmptEventInfo = reinterpret_cast<ompt::OmptEventInfoTy *>(Data);
106112
assert(OmptEventInfo && "Invalid OmptEventInfo");
107113
assert(OmptEventInfo->TraceRecord && "Invalid TraceRecord");
108114

109-
llvm::omp::target::ompt::RegionInterface.stopTargetSubmitTraceAsync(
110-
OmptEventInfo->TraceRecord, OmptEventInfo->NumTeams, StartNanos,
111-
EndNanos);
115+
ompt::RegionInterface.stopTargetSubmitTraceAsync(OmptEventInfo->TraceRecord,
116+
OmptEventInfo->NumTeams,
117+
StartNanos, EndNanos);
112118

113119
// Done processing, our responsibility to free the memory
114120
freeProfilerDataEntry(OmptEventInfo);
115121
}
116122

117-
void llvm::omp::target::ompt::OmptProfilerTy::handleDataTransfer(
118-
uint64_t StartNanos, uint64_t EndNanos, void *Data) {
123+
void ompt::OmptProfilerTy::handleDataTransfer(uint64_t StartNanos,
124+
uint64_t EndNanos, void *Data) {
119125

120126
if (!isProfilingEnabled())
121127
return;
@@ -125,23 +131,25 @@ void llvm::omp::target::ompt::OmptProfilerTy::handleDataTransfer(
125131
if (!Data)
126132
return;
127133

134+
DP("OMPT-Async: Time data for asynchronous execution: Start %lu "
135+
"End %lu\n",
136+
StartNanos, EndNanos);
137+
128138
auto OmptEventInfo = reinterpret_cast<ompt::OmptEventInfoTy *>(Data);
129139
assert(OmptEventInfo && "Invalid OmptEventInfo");
130140
assert(OmptEventInfo->TraceRecord && "Invalid TraceRecord");
131141

132-
llvm::omp::target::ompt::RegionInterface.stopTargetDataMovementTraceAsync(
142+
ompt::RegionInterface.stopTargetDataMovementTraceAsync(
133143
OmptEventInfo->TraceRecord, StartNanos, EndNanos);
134144

135145
// Done processing, our responsibility to free the memory
136146
freeProfilerDataEntry(OmptEventInfo);
137147
}
138148

139-
bool llvm::omp::target::ompt::OmptProfilerTy::isProfilingEnabled() {
140-
return llvm::omp::target::ompt::TracingActive;
141-
}
149+
bool ompt::OmptProfilerTy::isProfilingEnabled() { return ompt::TracingActive; }
142150

143-
void llvm::omp::target::ompt::OmptProfilerTy::setTimeConversionFactorsImpl(
144-
double Slope, double Offset) {
151+
void ompt::OmptProfilerTy::setTimeConversionFactorsImpl(double Slope,
152+
double Offset) {
145153
DP("Using Time Slope: %f and Offset: %f \n", Slope, Offset);
146154
setOmptHostToDeviceRate(Slope, Offset);
147155
}

offload/plugins-nextgen/common/OMPT/OmptProfiler.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
#include "OmptDeviceTracing.h"
2121
#include "OpenMP/OMPT/Callback.h"
22-
#include "OpenMP/OMPT/Interface.h"
2322
#include "Shared/Debug.h"
2423
#include "omp-tools.h"
2524

0 commit comments

Comments
 (0)