Skip to content

Commit 0c42764

Browse files
committed
[OFFLOAD] Add plugin with support for Intel Level Zero
1 parent e859265 commit 0c42764

27 files changed

+6586
-10
lines changed

offload/CMakeLists.txt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ if(DEFINED LIBOMPTARGET_BUILD_CUDA_PLUGIN OR
150150
message(WARNING "Option removed, use 'LIBOMPTARGET_PLUGINS_TO_BUILD' instead")
151151
endif()
152152

153-
set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host)
153+
set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host level_zero)
154154
set(LIBOMPTARGET_PLUGINS_TO_BUILD "all" CACHE STRING
155-
"Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
155+
"Semicolon-separated list of plugins to use: cuda, amdgpu, level_zero, host or \"all\".")
156156

157157
if(LIBOMPTARGET_PLUGINS_TO_BUILD STREQUAL "all")
158158
set(LIBOMPTARGET_PLUGINS_TO_BUILD ${LIBOMPTARGET_ALL_PLUGIN_TARGETS})
@@ -176,6 +176,19 @@ if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$"
176176
list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "cuda")
177177
endif()
178178
endif()
179+
if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64)$" AND
180+
CMAKE_SYSTEM_NAME MATCHES "Linux|Windows"))
181+
if("level_zero" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
182+
message(STATUS "Not building Level Zero plugin: it is only supported on "
183+
"Linux/Windows x86_64, ppc64le, or aarch64 hosts")
184+
list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "level_zero")
185+
endif()
186+
endif()
187+
if("level_zero" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD AND
188+
NOT LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND)
189+
message(STATUS "Not building Level Zero plugin: dependencies not found")
190+
list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "level_zero")
191+
endif()
179192
message(STATUS "Building the offload library with support for "
180193
"the \"${LIBOMPTARGET_PLUGINS_TO_BUILD}\" plugins")
181194

offload/cmake/Modules/LibomptargetGetDependencies.cmake

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,25 @@ if(LIBOMPTARGET_AMDGPU_ARCH)
8989
endif()
9090
endif()
9191

92+
################################################################################
93+
# Looking for Level0
94+
################################################################################
95+
message(STATUS "Looking for Level0 includes.")
96+
find_path(LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS NAMES level_zero/ze_api.h)
97+
98+
if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS)
99+
set(LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND FALSE)
100+
message(STATUS "Could NOT find Level Zero. Missing includes.")
101+
else()
102+
message(STATUS "Level Zero include DIR: ${LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS}")
103+
set(LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND TRUE)
104+
message(STATUS "Looking for Level Zero library.")
105+
find_library(LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES NAMES ze_loader)
106+
if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES)
107+
message(STATUS "Could NOT find Level Zero. Missing library.")
108+
else()
109+
message(STATUS "Level Zero library: ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES}")
110+
endif()
111+
endif()
112+
92113
set(OPENMP_PTHREAD_LIB ${LLVM_PTHREAD_LIB})

offload/include/OpenMP/InteropAPI.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,17 +160,12 @@ struct InteropTableEntry {
160160
Interops.push_back(obj);
161161
}
162162

163-
template <class ClearFuncTy> void clear(ClearFuncTy f) {
164-
for (auto &Obj : Interops) {
165-
f(Obj);
166-
}
167-
}
168-
169163
/// vector interface
170164
int size() const { return Interops.size(); }
171165
iterator begin() { return Interops.begin(); }
172166
iterator end() { return Interops.end(); }
173167
iterator erase(iterator it) { return Interops.erase(it); }
168+
void clear() { Interops.clear(); }
174169
};
175170

176171
struct InteropTblTy

offload/include/PerThreadTable.h

Lines changed: 153 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,93 @@
1616
#include <list>
1717
#include <memory>
1818
#include <mutex>
19+
#include <type_traits>
20+
21+
template <typename ObjectType> struct PerThread {
22+
struct PerThreadData {
23+
std::unique_ptr<ObjectType> ThEntry;
24+
};
25+
26+
std::mutex Mtx;
27+
std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
28+
29+
// define default constructors, disable copy and move constructors
30+
PerThread() = default;
31+
PerThread(const PerThread &) = delete;
32+
PerThread(PerThread &&) = delete;
33+
PerThread &operator=(const PerThread &) = delete;
34+
PerThread &operator=(PerThread &&) = delete;
35+
~PerThread() {
36+
std::lock_guard<std::mutex> Lock(Mtx);
37+
ThreadDataList.clear();
38+
}
39+
40+
private:
41+
PerThreadData &getThreadData() {
42+
static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
43+
if (!ThData) {
44+
ThData = std::make_shared<PerThreadData>();
45+
std::lock_guard<std::mutex> Lock(Mtx);
46+
ThreadDataList.push_back(ThData);
47+
}
48+
return *ThData;
49+
}
50+
51+
protected:
52+
ObjectType &getThreadEntry() {
53+
auto &ThData = getThreadData();
54+
if (ThData.ThEntry)
55+
return *ThData.ThEntry;
56+
ThData.ThEntry = std::make_unique<ObjectType>();
57+
return *ThData.ThEntry;
58+
}
59+
60+
public:
61+
ObjectType &get() { return getThreadEntry(); }
62+
63+
template <class F> void clear(F f) {
64+
std::lock_guard<std::mutex> Lock(Mtx);
65+
for (auto ThData : ThreadDataList) {
66+
if (!ThData->ThEntry)
67+
continue;
68+
f(*ThData->ThEntry);
69+
}
70+
ThreadDataList.clear();
71+
}
72+
};
1973

2074
// Using an STL container (such as std::vector) indexed by thread ID has
2175
// too many race conditions issues so we store each thread entry into a
2276
// thread_local variable.
2377
// T is the container type used to store the objects, e.g., std::vector,
2478
// std::set, etc. by each thread. O is the type of the stored objects e.g.,
2579
// omp_interop_val_t *, ...
26-
2780
template <typename ContainerType, typename ObjectType> struct PerThreadTable {
2881
using iterator = typename ContainerType::iterator;
2982

83+
template <typename, typename = std::void_t<>>
84+
struct has_iterator : std::false_type {};
85+
template <typename T>
86+
struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
87+
88+
template <typename T, typename = std::void_t<>>
89+
struct has_clear : std::false_type {};
90+
template <typename T>
91+
struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
92+
: std::true_type {};
93+
94+
template <typename T, typename = std::void_t<>>
95+
struct has_clearAll : std::false_type {};
96+
template <typename T>
97+
struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
98+
: std::true_type {};
99+
100+
template <typename, typename = std::void_t<>>
101+
struct is_associative : std::false_type {};
102+
template <typename T>
103+
struct is_associative<T, std::void_t<typename T::mapped_type>>
104+
: std::true_type {};
105+
30106
struct PerThreadData {
31107
size_t NElements = 0;
32108
std::unique_ptr<ContainerType> ThEntry;
@@ -71,6 +147,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
71147
return ThData.NElements;
72148
}
73149

150+
void setNElements(size_t Size) {
151+
auto &NElements = getThreadNElements();
152+
NElements = Size;
153+
}
154+
74155
public:
75156
void add(ObjectType obj) {
76157
auto &Entry = getThreadEntry();
@@ -104,11 +185,81 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
104185
for (auto ThData : ThreadDataList) {
105186
if (!ThData->ThEntry || ThData->NElements == 0)
106187
continue;
107-
ThData->ThEntry->clear(f);
188+
if constexpr (has_clearAll<ContainerType>::value) {
189+
ThData->ThEntry->clearAll(f);
190+
} else if constexpr (has_iterator<ContainerType>::value &&
191+
has_clear<ContainerType>::value) {
192+
for (auto &Obj : *ThData->ThEntry) {
193+
if constexpr (is_associative<ContainerType>::value) {
194+
f(Obj.second);
195+
} else {
196+
f(Obj);
197+
}
198+
}
199+
ThData->ThEntry->clear();
200+
} else {
201+
static_assert(true, "Container type not supported");
202+
}
108203
ThData->NElements = 0;
109204
}
110205
ThreadDataList.clear();
111206
}
112207
};
113208

209+
template <typename T, typename = std::void_t<>> struct ContainerValueType {
210+
using type = typename T::value_type;
211+
};
212+
template <typename T>
213+
struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
214+
using type = typename T::mapped_type;
215+
};
216+
217+
template <typename ContainerType, size_t reserveSize = 0>
218+
struct PerThreadContainer
219+
: public PerThreadTable<ContainerType,
220+
typename ContainerValueType<ContainerType>::type> {
221+
222+
// helpers
223+
template <typename T, typename = std::void_t<>> struct indexType {
224+
using type = typename T::size_type;
225+
};
226+
template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
227+
using type = typename T::key_type;
228+
};
229+
template <typename T, typename = std::void_t<>>
230+
struct has_resize : std::false_type {};
231+
template <typename T>
232+
struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
233+
: std::true_type {};
234+
235+
template <typename T, typename = std::void_t<>>
236+
struct has_reserve : std::false_type {};
237+
template <typename T>
238+
struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
239+
: std::true_type {};
240+
241+
using IndexType = typename indexType<ContainerType>::type;
242+
using ObjectType = typename ContainerValueType<ContainerType>::type;
243+
244+
// Get the object for the given index in the current thread
245+
ObjectType &get(IndexType Index) {
246+
auto &Entry = this->getThreadEntry();
247+
248+
// specialized code for vector-like containers
249+
if constexpr (has_resize<ContainerType>::value) {
250+
if (Index >= Entry.size()) {
251+
if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
252+
if (Entry.capacity() < reserveSize)
253+
Entry.reserve(reserveSize);
254+
}
255+
// If the index is out of bounds, try resize the container
256+
Entry.resize(Index + 1);
257+
}
258+
}
259+
ObjectType &Ret = Entry[Index];
260+
this->setNElements(Entry.size());
261+
return Ret;
262+
}
263+
};
264+
114265
#endif

offload/plugins-nextgen/common/include/DLWrap.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,5 +282,21 @@ template <size_t Requested, size_t Required> constexpr void verboseAssert() {
282282
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8, \
283283
x9, x10); \
284284
}
285+
#define DLWRAP_INSTANTIATE_12(SYM_DEF, SYM_USE, T) \
286+
T::ReturnType SYM_DEF(typename T::template arg<0>::type x0, \
287+
typename T::template arg<1>::type x1, \
288+
typename T::template arg<2>::type x2, \
289+
typename T::template arg<3>::type x3, \
290+
typename T::template arg<4>::type x4, \
291+
typename T::template arg<5>::type x5, \
292+
typename T::template arg<6>::type x6, \
293+
typename T::template arg<7>::type x7, \
294+
typename T::template arg<8>::type x8, \
295+
typename T::template arg<9>::type x9, \
296+
typename T::template arg<10>::type x10, \
297+
typename T::template arg<11>::type x11) { \
298+
return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8, \
299+
x9, x10, x11); \
300+
}
285301

286302
#endif // OMPTARGET_SHARED_DLWRAP_H
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND)
2+
return()
3+
endif()
4+
5+
# Create the library and add the default arguments.
6+
add_target_library(omptarget.rtl.level_zero LEVEL_ZERO)
7+
8+
set(LEVEL_ZERO_SRC_FILES
9+
src/L0Context.cpp
10+
src/L0Device.cpp
11+
src/L0Kernel.cpp
12+
src/L0Memory.cpp
13+
src/L0Program.cpp
14+
src/L0Plugin.cpp
15+
src/L0Program.cpp
16+
src/L0Options.cpp
17+
)
18+
list(APPEND LEVEL_ZERO_SRC_FILES
19+
src/OmpWrapper.cpp
20+
)
21+
22+
target_sources(omptarget.rtl.level_zero PRIVATE
23+
${LEVEL_ZERO_SRC_FILES}
24+
)
25+
26+
target_include_directories(omptarget.rtl.level_zero PRIVATE
27+
${CMAKE_CURRENT_SOURCE_DIR}/include
28+
${CMAKE_CURRENT_SOURCE_DIR}/src
29+
)
30+
31+
target_include_directories(omptarget.rtl.level_zero PRIVATE
32+
${LIBOMPTARGET_INCLUDE_DIR}
33+
${LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS}
34+
${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
35+
${LIBOMPTARGET_OMP_HEADER_DIR}
36+
)
37+
38+
if (EXISTS ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} AND NOT "level_zero" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
39+
message(STATUS "Building Level Zero NG plugin linked against level_zero library")
40+
41+
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
42+
target_link_libraries(omptarget.rtl.level_zero PRIVATE
43+
${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES})
44+
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
45+
# Full path to the L0 library is recognized as a linker option, so we
46+
# separate directory and file name
47+
get_filename_component(LEVEL_ZERO_LIBRARY_PATH
48+
${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} DIRECTORY)
49+
get_filename_component(LEVEL_ZERO_LIBRARY_NAME
50+
${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} NAME)
51+
target_link_libraries(omptarget.rtl.level_zero PRIVATE
52+
${LEVEL_ZERO_LIBRARY_NAME} ${LIBOMP_LIB_FILE})
53+
target_link_directories(omptarget.rtl.level_zero PRIVATE ${LEVEL_ZERO_LIBRARY_PATH})
54+
target_link_options(omptarget.rtl.level_zero PRIVATE "LINKER:-def:${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.def")
55+
libomptarget_add_resource_file(omptarget.rtl.level_zero)
56+
else()
57+
message(FATAL_ERROR "Missing platfrom support")
58+
endif()
59+
60+
else()
61+
message(STATUS "Building Level Zero NG plugin for dlopened level_zero")
62+
get_filename_component(LEVEL_ZERO_LIBRARY_NAME ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} NAME)
63+
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
64+
# Windows uses dll instead of lib files at runtime
65+
string(REGEX REPLACE "lib$" "dll" LEVEL_ZERO_LIBRARY_NAME ${LEVEL_ZERO_LIBRARY_NAME})
66+
endif()
67+
target_compile_options(omptarget.rtl.level_zero PRIVATE "-DLEVEL_ZERO_LIBRARY=\"${LEVEL_ZERO_LIBRARY_NAME}\"")
68+
target_sources(omptarget.rtl.level_zero PRIVATE src/L0DynWrapper.cpp)
69+
endif()

0 commit comments

Comments
 (0)