[OFFLOAD] Add plugin with support for Intel oneAPI Level Zero #158900

adurang · 2025-09-16T09:53:12Z

Add a new nextgen plugin that supports GPU devices through the Intel oneAPI Level Zero library.

This PR also adds some new support to the PerThread auxiliary classes that is used by the plugin.

llvmbot · 2025-09-16T09:53:46Z

@llvm/pr-subscribers-offload

Author: Alex Duran (adurang)

Changes

Add a new nextgen plugin that supports GPU devices through the Intel oneAPI Level Zero library.

This PR also adds some new support to the PerThread auxiliary classes that is used by the plugin.

Patch is 250.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158900.diff

27 Files Affected:

(modified) offload/CMakeLists.txt (+15-2)
(modified) offload/cmake/Modules/LibomptargetGetDependencies.cmake (+21)
(modified) offload/include/OpenMP/InteropAPI.h (+1-6)
(modified) offload/include/PerThreadTable.h (+153-2)
(modified) offload/plugins-nextgen/common/include/DLWrap.h (+16)
(added) offload/plugins-nextgen/level_zero/CMakeLists.txt (+69)
(added) offload/plugins-nextgen/level_zero/include/AsyncQueue.h (+50)
(added) offload/plugins-nextgen/level_zero/include/L0Context.h (+138)
(added) offload/plugins-nextgen/level_zero/include/L0Defs.h (+73)
(added) offload/plugins-nextgen/level_zero/include/L0Device.h (+680)
(added) offload/plugins-nextgen/level_zero/include/L0Interop.h (+25)
(added) offload/plugins-nextgen/level_zero/include/L0Kernel.h (+154)
(added) offload/plugins-nextgen/level_zero/include/L0Memory.h (+574)
(added) offload/plugins-nextgen/level_zero/include/L0Options.h (+189)
(added) offload/plugins-nextgen/level_zero/include/L0Plugin.h (+136)
(added) offload/plugins-nextgen/level_zero/include/L0Program.h (+135)
(added) offload/plugins-nextgen/level_zero/include/L0Trace.h (+193)
(added) offload/plugins-nextgen/level_zero/include/TLS.h (+86)
(added) offload/plugins-nextgen/level_zero/src/L0Context.cpp (+41)
(added) offload/plugins-nextgen/level_zero/src/L0Device.cpp (+1065)
(added) offload/plugins-nextgen/level_zero/src/L0DynWrapper.cpp (+134)
(added) offload/plugins-nextgen/level_zero/src/L0Kernel.cpp (+649)
(added) offload/plugins-nextgen/level_zero/src/L0Memory.cpp (+637)
(added) offload/plugins-nextgen/level_zero/src/L0Options.cpp (+371)
(added) offload/plugins-nextgen/level_zero/src/L0Plugin.cpp (+285)
(added) offload/plugins-nextgen/level_zero/src/L0Program.cpp (+625)
(added) offload/plugins-nextgen/level_zero/src/OmpWrapper.cpp (+71)

diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index b277380783500..8a704ab05eb53 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -150,9 +150,9 @@ if(DEFINED LIBOMPTARGET_BUILD_CUDA_PLUGIN OR
   message(WARNING "Option removed, use 'LIBOMPTARGET_PLUGINS_TO_BUILD' instead")
 endif()
 
-set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host)
+set(LIBOMPTARGET_ALL_PLUGIN_TARGETS amdgpu cuda host level_zero)
 set(LIBOMPTARGET_PLUGINS_TO_BUILD "all" CACHE STRING
-    "Semicolon-separated list of plugins to use: cuda, amdgpu, host or \"all\".")
+    "Semicolon-separated list of plugins to use: cuda, amdgpu, level_zero, host or \"all\".")
 
 if(LIBOMPTARGET_PLUGINS_TO_BUILD STREQUAL "all")
   set(LIBOMPTARGET_PLUGINS_TO_BUILD ${LIBOMPTARGET_ALL_PLUGIN_TARGETS})
@@ -176,6 +176,19 @@ if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(ppc64le)|(aarch64)$"
     list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "cuda")
   endif()
 endif()
+if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64)$" AND
+        CMAKE_SYSTEM_NAME MATCHES "Linux|Windows"))
+  if("level_zero" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    message(STATUS "Not building Level Zero plugin: it is only supported on "
+	           "Linux/Windows x86_64, ppc64le, or aarch64 hosts")
+    list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "level_zero")
+  endif()
+endif()
+if("level_zero" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD AND
+		NOT LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND)
+    message(STATUS "Not building Level Zero plugin: dependencies not found")
+    list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "level_zero")
+endif()
 message(STATUS "Building the offload library with support for "
                "the \"${LIBOMPTARGET_PLUGINS_TO_BUILD}\" plugins")
 
diff --git a/offload/cmake/Modules/LibomptargetGetDependencies.cmake b/offload/cmake/Modules/LibomptargetGetDependencies.cmake
index 2a8bdebf2c1dd..0af0ae1ecdbec 100644
--- a/offload/cmake/Modules/LibomptargetGetDependencies.cmake
+++ b/offload/cmake/Modules/LibomptargetGetDependencies.cmake
@@ -89,4 +89,25 @@ if(LIBOMPTARGET_AMDGPU_ARCH)
   endif()
 endif()
 
+################################################################################
+# Looking for Level0
+################################################################################
+message(STATUS "Looking for Level0 includes.")
+find_path(LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS NAMES level_zero/ze_api.h)
+
+if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS)
+	set(LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND FALSE)
+  message(STATUS "Could NOT find Level Zero. Missing includes.")
+else()
+  message(STATUS "Level Zero include DIR: ${LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS}")
+  set(LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND TRUE)
+  message(STATUS "Looking for Level Zero library.")
+  find_library(LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES NAMES ze_loader)
+  if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES)
+    message(STATUS "Could NOT find Level Zero. Missing library.")
+  else()
+	  message(STATUS "Level Zero library: ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES}")
+  endif()
+endif()
+
 set(OPENMP_PTHREAD_LIB ${LLVM_PTHREAD_LIB})
diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 53ac4be2e2e98..2553bfa930784 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -160,17 +160,12 @@ struct InteropTableEntry {
     Interops.push_back(obj);
   }
 
-  template <class ClearFuncTy> void clear(ClearFuncTy f) {
-    for (auto &Obj : Interops) {
-      f(Obj);
-    }
-  }
-
   /// vector interface
   int size() const { return Interops.size(); }
   iterator begin() { return Interops.begin(); }
   iterator end() { return Interops.end(); }
   iterator erase(iterator it) { return Interops.erase(it); }
+  void clear() { Interops.clear(); }
 };
 
 struct InteropTblTy
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 45b196171b4c8..0241370953c67 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -16,6 +16,60 @@
 #include <list>
 #include <memory>
 #include <mutex>
+#include <type_traits>
+
+template <typename ObjectType> struct PerThread {
+  struct PerThreadData {
+    std::unique_ptr<ObjectType> ThEntry;
+  };
+
+  std::mutex Mtx;
+  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+
+  // define default constructors, disable copy and move constructors
+  PerThread() = default;
+  PerThread(const PerThread &) = delete;
+  PerThread(PerThread &&) = delete;
+  PerThread &operator=(const PerThread &) = delete;
+  PerThread &operator=(PerThread &&) = delete;
+  ~PerThread() {
+    std::lock_guard<std::mutex> Lock(Mtx);
+    ThreadDataList.clear();
+  }
+
+private:
+  PerThreadData &getThreadData() {
+    static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
+    if (!ThData) {
+      ThData = std::make_shared<PerThreadData>();
+      std::lock_guard<std::mutex> Lock(Mtx);
+      ThreadDataList.push_back(ThData);
+    }
+    return *ThData;
+  }
+
+protected:
+  ObjectType &getThreadEntry() {
+    auto &ThData = getThreadData();
+    if (ThData.ThEntry)
+      return *ThData.ThEntry;
+    ThData.ThEntry = std::make_unique<ObjectType>();
+    return *ThData.ThEntry;
+  }
+
+public:
+  ObjectType &get() { return getThreadEntry(); }
+
+  template <class F> void clear(F f) {
+    std::lock_guard<std::mutex> Lock(Mtx);
+    for (auto ThData : ThreadDataList) {
+      if (!ThData->ThEntry)
+        continue;
+      f(*ThData->ThEntry);
+    }
+    ThreadDataList.clear();
+  }
+};
 
 // Using an STL container (such as std::vector) indexed by thread ID has
 // too many race conditions issues so we store each thread entry into a
@@ -23,10 +77,32 @@
 // T is the container type used to store the objects, e.g., std::vector,
 // std::set, etc. by each thread. O is the type of the stored objects e.g.,
 // omp_interop_val_t *, ...
-
 template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   using iterator = typename ContainerType::iterator;
 
+  template <typename, typename = std::void_t<>>
+  struct has_iterator : std::false_type {};
+  template <typename T>
+  struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_clear : std::false_type {};
+  template <typename T>
+  struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
+      : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_clearAll : std::false_type {};
+  template <typename T>
+  struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
+      : std::true_type {};
+
+  template <typename, typename = std::void_t<>>
+  struct is_associative : std::false_type {};
+  template <typename T>
+  struct is_associative<T, std::void_t<typename T::mapped_type>>
+      : std::true_type {};
+
   struct PerThreadData {
     size_t NElements = 0;
     std::unique_ptr<ContainerType> ThEntry;
@@ -71,6 +147,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     return ThData.NElements;
   }
 
+  void setNElements(size_t Size) {
+    auto &NElements = getThreadNElements();
+    NElements = Size;
+  }
+
 public:
   void add(ObjectType obj) {
     auto &Entry = getThreadEntry();
@@ -104,11 +185,81 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     for (auto ThData : ThreadDataList) {
       if (!ThData->ThEntry || ThData->NElements == 0)
         continue;
-      ThData->ThEntry->clear(f);
+      if constexpr (has_clearAll<ContainerType>::value) {
+        ThData->ThEntry->clearAll(f);
+      } else if constexpr (has_iterator<ContainerType>::value &&
+                           has_clear<ContainerType>::value) {
+        for (auto &Obj : *ThData->ThEntry) {
+          if constexpr (is_associative<ContainerType>::value) {
+            f(Obj.second);
+          } else {
+            f(Obj);
+          }
+        }
+        ThData->ThEntry->clear();
+      } else {
+        static_assert(true, "Container type not supported");
+      }
       ThData->NElements = 0;
     }
     ThreadDataList.clear();
   }
 };
 
+template <typename T, typename = std::void_t<>> struct ContainerValueType {
+  using type = typename T::value_type;
+};
+template <typename T>
+struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
+  using type = typename T::mapped_type;
+};
+
+template <typename ContainerType, size_t reserveSize = 0>
+struct PerThreadContainer
+    : public PerThreadTable<ContainerType,
+                            typename ContainerValueType<ContainerType>::type> {
+
+  // helpers
+  template <typename T, typename = std::void_t<>> struct indexType {
+    using type = typename T::size_type;
+  };
+  template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
+    using type = typename T::key_type;
+  };
+  template <typename T, typename = std::void_t<>>
+  struct has_resize : std::false_type {};
+  template <typename T>
+  struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
+      : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_reserve : std::false_type {};
+  template <typename T>
+  struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
+      : std::true_type {};
+
+  using IndexType = typename indexType<ContainerType>::type;
+  using ObjectType = typename ContainerValueType<ContainerType>::type;
+
+  // Get the object for the given index in the current thread
+  ObjectType &get(IndexType Index) {
+    auto &Entry = this->getThreadEntry();
+
+    // specialized code for vector-like containers
+    if constexpr (has_resize<ContainerType>::value) {
+      if (Index >= Entry.size()) {
+        if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
+          if (Entry.capacity() < reserveSize)
+            Entry.reserve(reserveSize);
+        }
+        // If the index is out of bounds, try resize the container
+        Entry.resize(Index + 1);
+      }
+    }
+    ObjectType &Ret = Entry[Index];
+    this->setNElements(Entry.size());
+    return Ret;
+  }
+};
+
 #endif
diff --git a/offload/plugins-nextgen/common/include/DLWrap.h b/offload/plugins-nextgen/common/include/DLWrap.h
index 8934e7e701021..95ce86e123cd3 100644
--- a/offload/plugins-nextgen/common/include/DLWrap.h
+++ b/offload/plugins-nextgen/common/include/DLWrap.h
@@ -282,5 +282,21 @@ template <size_t Requested, size_t Required> constexpr void verboseAssert() {
     return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8,  \
                                           x9, x10);                            \
   }
+#define DLWRAP_INSTANTIATE_12(SYM_DEF, SYM_USE, T)                             \
+  T::ReturnType SYM_DEF(typename T::template arg<0>::type x0,                  \
+                        typename T::template arg<1>::type x1,                  \
+                        typename T::template arg<2>::type x2,                  \
+                        typename T::template arg<3>::type x3,                  \
+                        typename T::template arg<4>::type x4,                  \
+                        typename T::template arg<5>::type x5,                  \
+                        typename T::template arg<6>::type x6,                  \
+                        typename T::template arg<7>::type x7,                  \
+                        typename T::template arg<8>::type x8,                  \
+                        typename T::template arg<9>::type x9,                  \
+                        typename T::template arg<10>::type x10,                \
+                        typename T::template arg<11>::type x11) {              \
+    return dlwrap::SYM_USE##_Trait::get()(x0, x1, x2, x3, x4, x5, x6, x7, x8,  \
+                                          x9, x10, x11);                       \
+  }
 
 #endif // OMPTARGET_SHARED_DLWRAP_H
diff --git a/offload/plugins-nextgen/level_zero/CMakeLists.txt b/offload/plugins-nextgen/level_zero/CMakeLists.txt
new file mode 100644
index 0000000000000..b9c8dd423c3ca
--- /dev/null
+++ b/offload/plugins-nextgen/level_zero/CMakeLists.txt
@@ -0,0 +1,69 @@
+if(NOT LIBOMPTARGET_DEP_LEVEL_ZERO_FOUND)
+return()
+endif()
+
+# Create the library and add the default arguments.
+add_target_library(omptarget.rtl.level_zero LEVEL_ZERO)
+
+set(LEVEL_ZERO_SRC_FILES
+        src/L0Context.cpp
+        src/L0Device.cpp
+        src/L0Kernel.cpp
+        src/L0Memory.cpp
+        src/L0Program.cpp
+        src/L0Plugin.cpp
+        src/L0Program.cpp
+        src/L0Options.cpp
+)
+list(APPEND LEVEL_ZERO_SRC_FILES
+        src/OmpWrapper.cpp
+)
+
+target_sources(omptarget.rtl.level_zero PRIVATE
+   ${LEVEL_ZERO_SRC_FILES}
+)
+
+target_include_directories(omptarget.rtl.level_zero PRIVATE
+      ${CMAKE_CURRENT_SOURCE_DIR}/include
+      ${CMAKE_CURRENT_SOURCE_DIR}/src
+)
+
+target_include_directories(omptarget.rtl.level_zero PRIVATE
+      ${LIBOMPTARGET_INCLUDE_DIR}
+      ${LIBOMPTARGET_DEP_LEVEL_ZERO_INCLUDE_DIRS}
+      ${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
+      ${LIBOMPTARGET_OMP_HEADER_DIR}
+)
+
+if (EXISTS ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} AND NOT "level_zero" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
+message(STATUS "Building Level Zero NG plugin linked against level_zero library")
+
+if(CMAKE_SYSTEM_NAME MATCHES "Linux")
+  target_link_libraries(omptarget.rtl.level_zero PRIVATE
+          ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES})
+elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
+  # Full path to the L0 library is recognized as a linker option, so we
+  # separate directory and file name
+  get_filename_component(LEVEL_ZERO_LIBRARY_PATH
+          ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} DIRECTORY)
+  get_filename_component(LEVEL_ZERO_LIBRARY_NAME
+          ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} NAME)
+  target_link_libraries(omptarget.rtl.level_zero PRIVATE
+          ${LEVEL_ZERO_LIBRARY_NAME} ${LIBOMP_LIB_FILE})
+  target_link_directories(omptarget.rtl.level_zero PRIVATE ${LEVEL_ZERO_LIBRARY_PATH})
+  target_link_options(omptarget.rtl.level_zero PRIVATE "LINKER:-def:${CMAKE_CURRENT_SOURCE_DIR}/src/rtl.def")
+  libomptarget_add_resource_file(omptarget.rtl.level_zero)
+else()
+   message(FATAL_ERROR "Missing platfrom support")
+endif()
+
+else()
+message(STATUS "Building Level Zero NG plugin for dlopened level_zero")
+get_filename_component(LEVEL_ZERO_LIBRARY_NAME ${LIBOMPTARGET_DEP_LEVEL_ZERO_LIBRARIES} NAME)
+if(CMAKE_SYSTEM_NAME MATCHES "Windows")
+   # Windows uses dll instead of lib files at runtime
+   string(REGEX REPLACE "lib$" "dll" LEVEL_ZERO_LIBRARY_NAME ${LEVEL_ZERO_LIBRARY_NAME})
+endif()
+target_compile_options(omptarget.rtl.level_zero PRIVATE "-DLEVEL_ZERO_LIBRARY=\"${LEVEL_ZERO_LIBRARY_NAME}\"")
+target_sources(omptarget.rtl.level_zero PRIVATE src/L0DynWrapper.cpp)
+endif()
diff --git a/offload/plugins-nextgen/level_zero/include/AsyncQueue.h b/offload/plugins-nextgen/level_zero/include/AsyncQueue.h
new file mode 100644
index 0000000000000..105f68205e402
--- /dev/null
+++ b/offload/plugins-nextgen/level_zero/include/AsyncQueue.h
@@ -0,0 +1,50 @@
+//===--- Level Zero Target RTL Implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Async Queue wrapper for SPIR-V/Xe machine
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <vector>
+
+#include "L0Memory.h"
+
+namespace llvm {
+namespace omp {
+namespace target {
+namespace plugin {
+
+/// Abstract queue that supports asynchronous command submission
+struct AsyncQueueTy {
+  /// List of events attahced to submitted commands
+  std::vector<ze_event_handle_t> WaitEvents;
+  /// Pending staging buffer to host copies
+  std::list<std::tuple<void *, void *, size_t>> H2MList;
+  /// Pending USM memory copy commands that must wait for kernel completion
+  std::list<std::tuple<const void *, void *, size_t>> USM2MList;
+  /// Kernel event not signaled
+  ze_event_handle_t KernelEvent = nullptr;
+  /// Is this queue being used currently
+  bool InUse = false;
+  /// Clear data
+  void reset() {
+    WaitEvents.clear();
+    H2MList.clear();
+    USM2MList.clear();
+    KernelEvent = nullptr;
+  }
+};
+
+typedef ObjPool<AsyncQueueTy> AsyncQueuePoolTy;
+
+} // namespace plugin
+} // namespace target
+} // namespace omp
+} // namespace llvm
diff --git a/offload/plugins-nextgen/level_zero/include/L0Context.h b/offload/plugins-nextgen/level_zero/include/L0Context.h
new file mode 100644
index 0000000000000..b2b6def8101ca
--- /dev/null
+++ b/offload/plugins-nextgen/level_zero/include/L0Context.h
@@ -0,0 +1,138 @@
+//===--- Level Zero Target RTL Implementation -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Level Zero Context abstraction
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "L0Memory.h"
+#include "PerThreadTable.h"
+
+namespace llvm::omp::target::plugin {
+
+class LevelZeroPluginTy;
+
+class L0ContextTLSTy {
+  StagingBufferTy StagingBuffer;
+
+public:
+  auto &getStagingBuffer() { return StagingBuffer; }
+  const auto &getStagingBuffer() const { return StagingBuffer; }
+
+  void clear() { StagingBuffer.clear(); }
+};
+
+struct L0ContextTLSTableTy
+    : public PerThreadContainer<
+          std::unordered_map<ze_context_handle_t, L0ContextTLSTy>> {
+  void clear() {
+    PerThreadTable::clear([](L0ContextTLSTy &Entry) { Entry.clear(); });
+  }
+};
+
+/// Driver and context-specific resources. We assume a single context per
+/// driver.
+class L0ContextTy {
+  /// The plugin that created this context
+  LevelZeroPluginTy &Plugin;
+
+  /// Level Zero Driver handle
+  ze_driver_handle_t zeDriver = nullptr;
+
+  /// Common Level Zero context
+  ze_context_handle_t zeContext = nullptr;
+
+  /// API version supported by the Level Zero driver
+  ze_api_version_t APIVersion = ZE_API_VERSION_CURRENT;
+
+  /// Imported external pointers. Track this only for user-directed
+  /// imports/releases.
+  std::unordered_map<uintptr_t, size_t> ImportedPtrs;
+
+  /// Common event pool
+  EventPoolTy EventPool;
+
+  /// Host Memory allocator for this driver
+  MemAllocatorTy HostMemAllocator;
+
+public:
+  /// Named constants for checking the imported external pointer regions.
+  static constexpr int32_t ImportNotExist = -1;
+  static constexpr int32_t ImportUnknown = 0;
+  static constexpr int32_t ImportExist = 1;
+
+  /// Create context, initialize event pool and extension functions
+  L0ContextTy(LevelZeroPluginTy &Plugin, ze_driver_handle_t zeDriver,
+              int32_t DriverId);
+
+  L0ContextTy(const L0ContextTy &) = delete;
+  L0ContextTy(L0ContextTy &&) = delete;
+  L0ContextTy &operator=(const L0ContextTy &) = delete;
+  L0ContextTy &operator=(const L0ContextTy &&) = delete;
+
+  /// Release resources
+  ~L0ContextTy() {
+    EventPool.deinit();
+    HostMemAllocator.deinit();
+    if (zeContext)
+      CALL_ZE_RET_VOID(zeContextDestroy, zeContext);
+  }
+
+  auto &getPlugin() const { return Plugin; }
+
+  StagingBufferTy &getStagingBuffer();
+
+  /// Add imported external pointer region.
+  void addImported(void *Ptr, size_t Size) {
+    (void)ImportedPtrs.emplace((uintptr_t)Ptr, Size);
+  }
+
+  /// Remove imported external pointer region
+  void removeImported(void *Ptr) { (void)ImportedPtrs.erase((uintptr_t)Ptr); }
+
+  /// Check if imported regions contain the specified region.
+  int32_t checkImported(void *Ptr, size_t Size) const {
+    uintptr_t LB = (uintptr_t)Ptr;
+    uintptr_t UB = LB + Size;
+    // We do not expect a large number of user-directed imports, so use simple
+    // logic.
+    for (auto &I : ImportedPtrs) {
+      uintptr_t ILB = I.first;
+      uintptr_t IUB = ILB + I.seco...
[truncated]

github-actions · 2025-09-16T09:57:56Z

✅ With the latest revision this PR passed the C/C++ code formatter.

offload/CMakeLists.txt

offload/plugins-nextgen/level_zero/CMakeLists.txt

offload/plugins-nextgen/level_zero/include/AsyncQueue.h

offload/plugins-nextgen/level_zero/include/L0Plugin.h

offload/plugins-nextgen/level_zero/src/L0Options.cpp

offload/plugins-nextgen/level_zero/include/L0Options.h

offload/plugins-nextgen/level_zero/src/L0Device.cpp

offload/plugins-nextgen/level_zero/src/L0Program.cpp

offload/plugins-nextgen/level_zero/src/L0Kernel.cpp

offload/plugins-nextgen/level_zero/src/L0Options.cpp

offload/plugins-nextgen/level_zero/include/L0Options.h

offload/plugins-nextgen/level_zero/include/L0Trace.h

offload/plugins-nextgen/level_zero/src/L0Program.cpp

offload/plugins-nextgen/level_zero/include/L0Program.h

Co-authored-by: Alexey Sachkov <[email protected]>

offload/plugins-nextgen/level_zero/src/L0Program.cpp

jhuber6

Quick scan, I can't review much beyond nits because I don't know how level zero actually works.

offload/CMakeLists.txt

offload/cmake/Modules/LibomptargetGetDependencies.cmake

jhuber6 · 2025-09-18T19:54:33Z

offload/CMakeLists.txt

    list(REMOVE_ITEM LIBOMPTARGET_PLUGINS_TO_BUILD "cuda")
  endif()
 endif()
+if(NOT (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64)$" AND


Wouldn't it be better to do this check when we create the list rather than removing it later?

All the other plugins are treated in the same way right now (e.g., 3 lines above). They are in the "candidate" list and if some condition is not met they're removed.

I don't disagree with your statement about do it earlier but we should be consistent, so maybe something to do subsequently?

offload/include/PerThreadTable.h

offload/plugins-nextgen/level_zero/CMakeLists.txt

offload/plugins-nextgen/level_zero/include/L0Program.h

offload/plugins-nextgen/level_zero/src/L0Kernel.cpp

offload/CMakeLists.txt

offload/plugins-nextgen/level_zero/src/L0Program.cpp

adurang · 2025-11-03T22:47:51Z

Thanks @kevinsala. I think I addressed all your comments.

offload/plugins-nextgen/level_zero/include/L0Kernel.h

offload/plugins-nextgen/level_zero/src/L0Kernel.cpp

Split from #158900 it adds a PerThreadContainer that can use STL-like indexed containers based on a slightly refactored PerThreadTable. --------- Co-authored-by: Joseph Huber <[email protected]>

adurang · 2025-11-26T01:39:02Z

ping @kevinsala @jhuber6

[OFFLOAD] Add plugin with support for Intel Level Zero

0c42764

llvmbot added the offload label Sep 16, 2025

Merge branch 'main' into l0_plugin

e8c471e

RossBrunton self-requested a review September 16, 2025 11:06

AlexeySachkov reviewed Sep 16, 2025

View reviewed changes

sarnex reviewed Sep 16, 2025

View reviewed changes

offload/plugins-nextgen/level_zero/src/L0Program.cpp Outdated Show resolved Hide resolved

jhuber6 reviewed Sep 16, 2025

View reviewed changes

offload/plugins-nextgen/level_zero/include/L0Program.h Outdated Show resolved Hide resolved

adurang and others added 7 commits September 16, 2025 23:02

Update offload/CMakeLists.txt

f8956cd

Co-authored-by: Alexey Sachkov <[email protected]>

Update offload/plugins-nextgen/level_zero/CMakeLists.txt

d3fc4d7

Co-authored-by: Alexey Sachkov <[email protected]>

Update offload/plugins-nextgen/level_zero/include/L0Plugin.h

4b38386

Co-authored-by: Alexey Sachkov <[email protected]>

Replace pragma once

6c1c820

Address review comments

fd91c47

Fix makefile format

84665dc

change to StringRef in multiple places

a2217db

kevinsala self-requested a review September 17, 2025 14:33

adurang added 4 commits September 18, 2025 09:49

remove tokenize

08880a6

remove unused code

9a3088c

fix format

24d0645

Remove environment variable

0eb5712

sarnex reviewed Sep 18, 2025

View reviewed changes

offload/plugins-nextgen/level_zero/src/L0Program.cpp Outdated Show resolved Hide resolved

adurang added 2 commits September 18, 2025 18:42

fix getAsInteger conditions

f491f3d

Merge branch 'main' into l0_plugin

9bbdae3

sarnex mentioned this pull request Sep 18, 2025

[Offload] Change ELF machine type for SPIR-V OpenMP image #159623

Merged

kevinsala requested a review from koparasy September 18, 2025 19:20

jhuber6 reviewed Sep 18, 2025

View reviewed changes

RossBrunton reviewed Sep 19, 2025

View reviewed changes

offload/CMakeLists.txt Outdated Show resolved Hide resolved

RossBrunton reviewed Sep 19, 2025

View reviewed changes

offload/plugins-nextgen/level_zero/src/L0Program.cpp Outdated Show resolved Hide resolved

adurang added 2 commits November 3, 2025 22:55

Remove unused code

ba0ac45

Use correct thread information

32b9b59

adurang added 6 commits November 3, 2025 23:50

format

b44f992

Fix asserts to not be undefined

a14ee62

Fix env var parsing issue

e1dfd79

Merge branch 'extend_ptt' into l0_plugin

21effca

Merge branch 'main' into l0_plugin

581b952

add isAccessiblePtrImpl to L0Device

045c94c

lplewa mentioned this pull request Nov 5, 2025

[offload] add support for SPIR-V kernel images #166545

Draft

adurang added 3 commits November 7, 2025 10:53

fixes

fd15e04

format

827b28c

fix

26a1231

kevinsala reviewed Nov 12, 2025

View reviewed changes

offload/plugins-nextgen/level_zero/include/L0Kernel.h Outdated Show resolved Hide resolved

offload/plugins-nextgen/level_zero/src/L0Kernel.cpp Outdated Show resolved Hide resolved

adurang added 12 commits November 12, 2025 07:12

make kernel properties mutable; remove unused field

e2b8523

Implement a few additional services

bd364c5

minor change

0f59337

minor refactoring

579264a

Refactor container concepts checks

26fbcc0

remove clearAll

32819df

Simplify PerThreadData

2d4f364

Merge branch 'extend_ptt' into l0_plugin

9c602be

fix

bc0ccaf

Address comments

fd38f8d

fix comments

9c5942e

Merge branch 'extend_ptt' into l0_plugin

8160235

adurang added 2 commits November 26, 2025 02:24

Merge branch 'main' into l0_plugin

1cadfb8

adjust to removal of memory pool

6f850c2

[OFFLOAD] Add plugin with support for Intel oneAPI Level Zero #158900

Are you sure you want to change the base?

[OFFLOAD] Add plugin with support for Intel oneAPI Level Zero #158900

Uh oh!

Conversation

adurang commented Sep 16, 2025

Uh oh!

llvmbot commented Sep 16, 2025

Uh oh!

github-actions bot commented Sep 16, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

jhuber6 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

jhuber6 Sep 18, 2025

Choose a reason for hiding this comment

Uh oh!

adurang Sep 21, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

adurang commented Nov 3, 2025

Uh oh!

Uh oh!

Uh oh!

adurang commented Nov 26, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

9 participants

github-actions bot commented Sep 16, 2025 •

edited

Loading