Skip to content

Commit 7e68c9e

Browse files
mhalktru
authored andcommitted
[OpenMP] [OMPT] [7/8] Invoke tool-supplied callbacks before and after target launch and data transfer operations
Implemented RAII objects, initialized at target entry points, that invoke tool-supplied callbacks. Updated status of target callbacks as implemented. Depends on D127365 Patch from John Mellor-Crummey <[email protected]> With contributions from: Dhruva Chakrabarti <[email protected]> Jan-Patrick Lehr <[email protected]> Reviewed By: jdoerfert, dhruvachak, jplehr Differential Revision: https://reviews.llvm.org/D127367 (cherry picked from commit 1dec417)
1 parent ce53e91 commit 7e68c9e

14 files changed

+473
-88
lines changed

openmp/libomptarget/include/OmptCallback.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@
2727
FOREACH_OMPT_NOEMI_EVENT(macro) \
2828
FOREACH_OMPT_EMI_EVENT(macro)
2929

30+
#define performIfOmptInitialized(stmt) \
31+
do { \
32+
if (llvm::omp::target::ompt::Initialized) { \
33+
stmt; \
34+
} \
35+
} while (0)
36+
3037
#define performOmptCallback(CallbackName, ...) \
3138
do { \
3239
if (ompt_callback_##CallbackName##_fn) \
@@ -89,6 +96,8 @@ extern bool Initialized;
8996
} // namespace omp
9097
} // namespace llvm
9198

99+
#else
100+
#define performIfOmptInitialized(stmt)
92101
#endif // OMPT_SUPPORT
93102

94103
#pragma pop_macro("DEBUG_PREFIX")

openmp/libomptarget/src/OmptCallback.cpp

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,20 @@ FOREACH_OMPT_NOEMI_EVENT(defineOmptCallback)
3535
FOREACH_OMPT_EMI_EVENT(defineOmptCallback)
3636
#undef defineOmptCallback
3737

38-
/// Thread local state for target region and associated metadata
39-
thread_local llvm::omp::target::ompt::Interface OmptInterface;
38+
/// Forward declaration
39+
class LibomptargetRtlFinalizer;
4040

41-
/// Define function pointers
42-
ompt_get_task_data_t ompt_get_task_data_fn = nullptr;
41+
/// Object that will maintain the RTL finalizer from the plugin
42+
LibomptargetRtlFinalizer *LibraryFinalizer = nullptr;
43+
44+
thread_local Interface llvm::omp::target::ompt::RegionInterface;
45+
46+
bool llvm::omp::target::ompt::Initialized = false;
47+
48+
ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr;
49+
ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = nullptr;
4350
ompt_get_target_task_data_t ompt_get_target_task_data_fn = nullptr;
51+
ompt_get_task_data_t ompt_get_task_data_fn = nullptr;
4452

4553
/// Unique correlation id
4654
static std::atomic<uint64_t> IdCounter(1);
@@ -51,14 +59,14 @@ static uint64_t createId() { return IdCounter.fetch_add(1); }
5159
/// Create a new correlation id and update the operations id
5260
static uint64_t createOpId() {
5361
uint64_t NewId = createId();
54-
OmptInterface.setHostOpId(NewId);
62+
RegionInterface.setHostOpId(NewId);
5563
return NewId;
5664
}
5765

5866
/// Create a new correlation id and update the target region id
5967
static uint64_t createRegionId() {
6068
uint64_t NewId = createId();
61-
OmptInterface.setTargetDataValue(NewId);
69+
RegionInterface.setTargetDataValue(NewId);
6270
return NewId;
6371
}
6472

@@ -68,18 +76,19 @@ void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
6876
if (ompt_callback_target_data_op_emi_fn) {
6977
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
7078
// callback
71-
ompt_callback_target_data_op_emi_fn(ompt_scope_begin, TargetTaskData,
72-
&TargetData, &TargetRegionOpId,
73-
ompt_target_data_alloc, HstPtrBegin,
74-
DeviceId, /* TgtPtrBegin */ nullptr,
75-
/* TgtDeviceNum */ 0, Size, Code);
79+
ompt_callback_target_data_op_emi_fn(
80+
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
81+
ompt_target_data_alloc, HstPtrBegin,
82+
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
83+
/* TgtDeviceNum */ DeviceId, Size, Code);
7684
} else if (ompt_callback_target_data_op_fn) {
7785
// HostOpId is set by the runtime
7886
HostOpId = createOpId();
7987
// Invoke the tool supplied data op callback
8088
ompt_callback_target_data_op_fn(
8189
TargetData.value, HostOpId, ompt_target_data_alloc, HstPtrBegin,
82-
DeviceId, /* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ 0, Size, Code);
90+
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
91+
/* TgtDeviceNum */ DeviceId, Size, Code);
8392
}
8493
}
8594

@@ -89,11 +98,11 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
8998
if (ompt_callback_target_data_op_emi_fn) {
9099
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
91100
// callback
92-
ompt_callback_target_data_op_emi_fn(ompt_scope_end, TargetTaskData,
93-
&TargetData, &TargetRegionOpId,
94-
ompt_target_data_alloc, HstPtrBegin,
95-
DeviceId, /* TgtPtrBegin */ nullptr,
96-
/* TgtDeviceNum */ 0, Size, Code);
101+
ompt_callback_target_data_op_emi_fn(
102+
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
103+
ompt_target_data_alloc, HstPtrBegin,
104+
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
105+
/* TgtDeviceNum */ DeviceId, Size, Code);
97106
}
98107
endTargetDataOperation();
99108
}
@@ -108,14 +117,16 @@ void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
108117
ompt_callback_target_data_op_emi_fn(
109118
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
110119
ompt_target_data_transfer_to_device, HstPtrBegin,
111-
/* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code);
120+
/* SrcDeviceNum */ omp_get_initial_device(), TgtPtrBegin, DeviceId,
121+
Size, Code);
112122
} else if (ompt_callback_target_data_op_fn) {
113123
// HostOpId is set by the runtime
114124
HostOpId = createOpId();
115125
// Invoke the tool supplied data op callback
116126
ompt_callback_target_data_op_fn(
117127
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
118-
HstPtrBegin, /* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code);
128+
HstPtrBegin, /* SrcDeviceNum */ omp_get_initial_device(), TgtPtrBegin,
129+
DeviceId, Size, Code);
119130
}
120131
}
121132

@@ -129,7 +140,8 @@ void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
129140
ompt_callback_target_data_op_emi_fn(
130141
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
131142
ompt_target_data_transfer_to_device, HstPtrBegin,
132-
/* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code);
143+
/* SrcDeviceNum */ omp_get_initial_device(), TgtPtrBegin, DeviceId,
144+
Size, Code);
133145
}
134146
endTargetDataOperation();
135147
}
@@ -143,15 +155,15 @@ void Interface::beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
143155
ompt_callback_target_data_op_emi_fn(
144156
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
145157
ompt_target_data_delete, TgtPtrBegin, DeviceId,
146-
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ 0, /* Bytes */ 0, Code);
158+
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ -1, /* Bytes */ 0, Code);
147159
} else if (ompt_callback_target_data_op_fn) {
148160
// HostOpId is set by the runtime
149161
HostOpId = createOpId();
150162
// Invoke the tool supplied data op callback
151163
ompt_callback_target_data_op_fn(TargetData.value, HostOpId,
152164
ompt_target_data_delete, TgtPtrBegin,
153165
DeviceId, /* TgtPtrBegin */ nullptr,
154-
/* TgtDeviceNum */ 0, /* Bytes */ 0, Code);
166+
/* TgtDeviceNum */ -1, /* Bytes */ 0, Code);
155167
}
156168
}
157169

@@ -164,7 +176,7 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
164176
ompt_callback_target_data_op_emi_fn(
165177
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
166178
ompt_target_data_delete, TgtPtrBegin, DeviceId,
167-
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ 0, /* Bytes */ 0, Code);
179+
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ -1, /* Bytes */ 0, Code);
168180
}
169181
endTargetDataOperation();
170182
}
@@ -176,19 +188,19 @@ void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
176188
if (ompt_callback_target_data_op_emi_fn) {
177189
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
178190
// callback
179-
ompt_callback_target_data_op_emi_fn(ompt_scope_begin, TargetTaskData,
180-
&TargetData, &TargetRegionOpId,
181-
ompt_target_data_transfer_from_device,
182-
TgtPtrBegin, DeviceId, HstPtrBegin,
183-
/* TgtDeviceNum */ 0, Size, Code);
191+
ompt_callback_target_data_op_emi_fn(
192+
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
193+
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
194+
HstPtrBegin,
195+
/* TgtDeviceNum */ omp_get_initial_device(), Size, Code);
184196
} else if (ompt_callback_target_data_op_fn) {
185197
// HostOpId is set by the runtime
186198
HostOpId = createOpId();
187199
// Invoke the tool supplied data op callback
188-
ompt_callback_target_data_op_fn(TargetData.value, HostOpId,
189-
ompt_target_data_transfer_from_device,
190-
TgtPtrBegin, DeviceId, HstPtrBegin,
191-
/* TgtDeviceNum */ 0, Size, Code);
200+
ompt_callback_target_data_op_fn(
201+
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
202+
TgtPtrBegin, DeviceId, HstPtrBegin,
203+
/* TgtDeviceNum */ omp_get_initial_device(), Size, Code);
192204
}
193205
}
194206

@@ -199,11 +211,11 @@ void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
199211
if (ompt_callback_target_data_op_emi_fn) {
200212
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
201213
// callback
202-
ompt_callback_target_data_op_emi_fn(ompt_scope_end, TargetTaskData,
203-
&TargetData, &TargetRegionOpId,
204-
ompt_target_data_transfer_from_device,
205-
TgtPtrBegin, DeviceId, HstPtrBegin,
206-
/* TgtDeviceNum */ 0, Size, Code);
214+
ompt_callback_target_data_op_emi_fn(
215+
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
216+
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
217+
HstPtrBegin,
218+
/* TgtDeviceNum */ omp_get_initial_device(), Size, Code);
207219
}
208220
endTargetDataOperation();
209221
}
@@ -230,6 +242,7 @@ void Interface::endTargetSubmit(unsigned int numTeams) {
230242
numTeams);
231243
}
232244
}
245+
233246
void Interface::beginTargetDataEnter(int64_t DeviceId, void *Code) {
234247
beginTargetRegion();
235248
if (ompt_callback_target_emi_fn) {
@@ -391,14 +404,6 @@ class LibomptargetRtlFinalizer {
391404
llvm::SmallVector<ompt_finalize_t> RtlFinalizationFunctions;
392405
};
393406

394-
/// Object that will maintain the RTL finalizer from the plugin
395-
LibomptargetRtlFinalizer *LibraryFinalizer = nullptr;
396-
397-
bool llvm::omp::target::ompt::Initialized = false;
398-
399-
ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr;
400-
ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = nullptr;
401-
402407
int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup,
403408
int initial_device_num,
404409
ompt_data_t *tool_data) {
@@ -418,6 +423,9 @@ int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup,
418423

419424
assert(lookupCallbackByCode && "lookupCallbackByCode should be non-null");
420425
assert(lookupCallbackByName && "lookupCallbackByName should be non-null");
426+
assert(ompt_get_task_data_fn && "ompt_get_task_data_fn should be non-null");
427+
assert(ompt_get_target_task_data_fn &&
428+
"ompt_get_target_task_data_fn should be non-null");
421429
assert(LibraryFinalizer == nullptr &&
422430
"LibraryFinalizer should not be initialized yet");
423431

@@ -434,6 +442,7 @@ void llvm::omp::target::ompt::finalizeLibrary(ompt_data_t *data) {
434442
// with this library
435443
LibraryFinalizer->finalize();
436444
delete LibraryFinalizer;
445+
Initialized = false;
437446
}
438447

439448
void llvm::omp::target::ompt::connectLibrary() {

openmp/libomptarget/src/OmptInterface.h

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,18 @@
1313
#ifndef _OMPTARGET_OMPTINTERFACE_H
1414
#define _OMPTARGET_OMPTINTERFACE_H
1515

16+
// Only provide functionality if target OMPT support is enabled
17+
#ifdef OMPT_SUPPORT
18+
#include <functional>
19+
#include <tuple>
20+
21+
#include "OmptCallback.h"
1622
#include "omp-tools.h"
1723

18-
// If target OMPT support is compiled in
19-
#ifdef OMPT_SUPPORT
24+
#include "llvm/Support/ErrorHandling.h"
25+
2026
#define OMPT_IF_BUILT(stmt) stmt
21-
#else
22-
#define OMPT_IF_BUILT(stmt)
23-
#endif
27+
#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)
2428

2529
/// Callbacks for target regions require task_data representing the
2630
/// encountering task.
@@ -108,6 +112,66 @@ class Interface {
108112
/// Top-level function for invoking callback after target construct
109113
void endTarget(int64_t DeviceId, void *Code);
110114

115+
// Callback getter: Target data operations
116+
template <ompt_target_data_op_t OpType> auto getCallbacks() {
117+
if constexpr (OpType == ompt_target_data_alloc ||
118+
OpType == ompt_target_data_alloc_async)
119+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataAlloc),
120+
std::mem_fn(&Interface::endTargetDataAlloc));
121+
122+
if constexpr (OpType == ompt_target_data_delete ||
123+
OpType == ompt_target_data_delete_async)
124+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataDelete),
125+
std::mem_fn(&Interface::endTargetDataDelete));
126+
127+
if constexpr (OpType == ompt_target_data_transfer_to_device ||
128+
OpType == ompt_target_data_transfer_to_device_async)
129+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataSubmit),
130+
std::mem_fn(&Interface::endTargetDataSubmit));
131+
132+
if constexpr (OpType == ompt_target_data_transfer_from_device ||
133+
OpType == ompt_target_data_transfer_from_device_async)
134+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataRetrieve),
135+
std::mem_fn(&Interface::endTargetDataRetrieve));
136+
137+
llvm_unreachable("Unhandled target data operation type!");
138+
}
139+
140+
// Callback getter: Target region operations
141+
template <ompt_target_t OpType> auto getCallbacks() {
142+
if constexpr (OpType == ompt_target_enter_data ||
143+
OpType == ompt_target_enter_data_nowait)
144+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataEnter),
145+
std::mem_fn(&Interface::endTargetDataEnter));
146+
147+
if constexpr (OpType == ompt_target_exit_data ||
148+
OpType == ompt_target_exit_data_nowait)
149+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataExit),
150+
std::mem_fn(&Interface::endTargetDataExit));
151+
152+
if constexpr (OpType == ompt_target_update ||
153+
OpType == ompt_target_update_nowait)
154+
return std::make_pair(std::mem_fn(&Interface::beginTargetUpdate),
155+
std::mem_fn(&Interface::endTargetUpdate));
156+
157+
if constexpr (OpType == ompt_target || OpType == ompt_target_nowait)
158+
return std::make_pair(std::mem_fn(&Interface::beginTarget),
159+
std::mem_fn(&Interface::endTarget));
160+
161+
llvm_unreachable("Unknown target region operation type!");
162+
}
163+
164+
// Callback getter: Kernel launch operation
165+
template <ompt_callbacks_t OpType> auto getCallbacks() {
166+
// We use 'ompt_callbacks_t', because no other enum is currently available
167+
// to model a kernel launch / target submit operation.
168+
if constexpr (OpType == ompt_callback_target_submit)
169+
return std::make_pair(std::mem_fn(&Interface::beginTargetSubmit),
170+
std::mem_fn(&Interface::endTargetSubmit));
171+
172+
llvm_unreachable("Unhandled target operation!");
173+
}
174+
111175
/// Setters for target region and target operation correlation ids
112176
void setTargetDataValue(uint64_t DataValue) { TargetData.value = DataValue; }
113177
void setTargetDataPtr(void *DataPtr) { TargetData.ptr = DataPtr; }
@@ -147,11 +211,53 @@ class Interface {
147211
void endTargetRegion();
148212
};
149213

214+
/// Thread local state for target region and associated metadata
215+
extern thread_local Interface RegionInterface;
216+
217+
template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
218+
void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args,
219+
std::index_sequence<IndexSeq...>) {
220+
std::invoke(Func, RegionInterface, std::get<IndexSeq>(Args)...);
221+
}
222+
223+
template <typename CallbackPairTy, typename... ArgsTy> class InterfaceRAII {
224+
public:
225+
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
226+
: Arguments(Args...), beginFunction(std::get<0>(Callbacks)),
227+
endFunction(std::get<1>(Callbacks)) {
228+
performIfOmptInitialized(begin());
229+
}
230+
~InterfaceRAII() { performIfOmptInitialized(end()); }
231+
232+
private:
233+
void begin() {
234+
auto IndexSequence =
235+
std::make_index_sequence<std::tuple_size_v<decltype(Arguments)>>{};
236+
InvokeInterfaceFunction(beginFunction, Arguments, IndexSequence);
237+
}
238+
239+
void end() {
240+
auto IndexSequence =
241+
std::make_index_sequence<std::tuple_size_v<decltype(Arguments)>>{};
242+
InvokeInterfaceFunction(endFunction, Arguments, IndexSequence);
243+
}
244+
245+
std::tuple<ArgsTy...> Arguments;
246+
typename CallbackPairTy::first_type beginFunction;
247+
typename CallbackPairTy::second_type endFunction;
248+
};
249+
250+
// InterfaceRAII's class template argument deduction guide
251+
template <typename CallbackPairTy, typename... ArgsTy>
252+
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
253+
-> InterfaceRAII<CallbackPairTy, ArgsTy...>;
254+
150255
} // namespace ompt
151256
} // namespace target
152257
} // namespace omp
153258
} // namespace llvm
154-
155-
extern thread_local llvm::omp::target::ompt::Interface OmptInterface;
259+
#else
260+
#define OMPT_IF_BUILT(stmt)
261+
#endif
156262

157263
#endif // _OMPTARGET_OMPTINTERFACE_H

0 commit comments

Comments
 (0)