Skip to content

Commit 158a592

Browse files
committed
[PROTON][XPU] Remove l0 build-time dependency
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 1e425cc commit 158a592

File tree

4 files changed

+150
-102
lines changed

4 files changed

+150
-102
lines changed

third_party/intel/backend/driver.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def __del__(self):
260260
ctypes.windll.kernel32.FreeLibrary(handle)
261261

262262

263-
def compile_module_from_src(src: str, name: str):
263+
def compile_module_from_src(src: str, name: str, include_dirs: list[str] | None = None):
264264
hasher = hashlib.sha256(__CACHE_VERSION.encode("utf-8"))
265265
hasher.update((src + platform_key()).encode("utf-8"))
266266
key = hasher.hexdigest()
@@ -279,8 +279,9 @@ def compile_module_from_src(src: str, name: str):
279279
else:
280280
extra_compiler_args += ["-Wl,-rpath," + dir for dir in COMPILATION_HELPER.libsycl_dir]
281281

282-
so = _build(name, src_path, tmpdir, COMPILATION_HELPER.library_dir, COMPILATION_HELPER.include_dir,
283-
COMPILATION_HELPER.libraries, ccflags=extra_compiler_args)
282+
so = _build(name, src_path, tmpdir, COMPILATION_HELPER.library_dir,
283+
COMPILATION_HELPER.include_dir + (include_dirs or []), COMPILATION_HELPER.libraries,
284+
ccflags=extra_compiler_args)
284285
with open(so, "rb") as f:
285286
cache_path = cache.put(f.read(), f"{name}{suffix}", binary=True)
286287

@@ -290,7 +291,7 @@ def compile_module_from_src(src: str, name: str):
290291
return SpirvUtils(cache_path)
291292
if name == '__triton_launcher':
292293
return TritonLauncher(cache_path)
293-
if name == 'proton_utils':
294+
if name in ['proton_utils', 'xpu_api']:
294295
return cache_path
295296

296297
return _load_module_from_path(name, cache_path)
@@ -946,6 +947,13 @@ def build_proton_help_lib(self):
946947
from triton.backends.intel.driver import compile_module_from_src
947948

948949
dirname = os.path.dirname(os.path.realpath(__file__))
950+
dirname_third_party = os.path.realpath(dirname + "../../..")
951+
include_dir = dirname_third_party + "/proton/csrc/include/"
952+
os.environ["PROTON_XPUAPI_LIB_PATH"] = compile_module_from_src(
953+
src=Path(dirname_third_party + "/proton/csrc/lib/Driver/GPU/XpuApiCompileInRuntime.cpp").read_text(),
954+
name="xpu_api",
955+
include_dirs=[include_dir],
956+
)
949957
return compile_module_from_src(src=Path(dirname).joinpath("proton_utils.cpp").read_text(), name="proton_utils")
950958

951959
def get_active_torch_device(self):

third_party/proton/csrc/include/Driver/GPU/XpuApi.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,11 @@
22
#define PROTON_DRIVER_GPU_SYCL_H_
33

44
#include "Device.h"
5-
#include <level_zero/ze_api.h>
65

76
namespace proton {
87

98
namespace xpu {
109

11-
template <bool CheckSuccess> ze_result_t init(ze_init_flags_t flags);
12-
13-
template <bool CheckSuccess>
14-
ze_result_t driverGet(uint32_t *pCount, ze_driver_handle_t *phDrivers);
15-
16-
template <bool CheckSuccess>
17-
ze_result_t deviceGet(ze_driver_handle_t hDriver, uint32_t *pCount,
18-
ze_device_handle_t *phDevices);
19-
20-
template <bool CheckSuccess>
21-
ze_result_t deviceGetProperties(ze_device_handle_t hDevice,
22-
ze_device_properties_t *pDeviceProperties);
23-
24-
template <bool CheckSuccess>
25-
ze_result_t
26-
deviceGetMemoryProperties(ze_device_handle_t hDevice, uint32_t *pCount,
27-
ze_device_memory_properties_t *pMemProperties);
28-
2910
Device getDevice(uint64_t index);
3011

3112
} // namespace xpu

third_party/proton/csrc/lib/Driver/GPU/XpuApi.cpp

Lines changed: 28 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,44 @@
11
#include "Driver/GPU/XpuApi.h"
2-
#include "Driver/Dispatch.h"
32

4-
#include <level_zero/ze_api.h>
3+
#include <dlfcn.h>
4+
#include <iostream>
55
#include <string>
66

77
namespace proton {
88

99
namespace xpu {
1010

11-
struct ExternLibLevelZero : public ExternLibBase {
12-
using RetType = ze_result_t;
11+
typedef void (*GetDeviceFunc)(uint64_t, uint32_t *, uint32_t *, uint32_t *,
12+
uint32_t *, char[256]);
1313

14-
// FIXME: removeme `/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so.1`
15-
static constexpr const char *name = "libze_intel_gpu.so.1";
16-
static constexpr const char *defaultDir = "";
17-
static constexpr RetType success = ZE_RESULT_SUCCESS;
18-
static void *lib;
19-
};
20-
21-
void *ExternLibLevelZero::lib = nullptr;
22-
23-
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zeinit
24-
DEFINE_DISPATCH(ExternLibLevelZero, init, zeInit, ze_init_flags_t)
25-
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedriverget
26-
DEFINE_DISPATCH(ExternLibLevelZero, driverGet, zeDriverGet, uint32_t *,
27-
ze_driver_handle_t *)
28-
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedeviceget
29-
DEFINE_DISPATCH(ExternLibLevelZero, deviceGet, zeDeviceGet, ze_driver_handle_t,
30-
uint32_t *, ze_device_handle_t *)
31-
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedevicegetproperties
32-
DEFINE_DISPATCH(ExternLibLevelZero, deviceGetProperties, zeDeviceGetProperties,
33-
ze_device_handle_t, ze_device_properties_t *)
34-
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedevicegetmemoryproperties
35-
DEFINE_DISPATCH(ExternLibLevelZero, deviceGetMemoryProperties,
36-
zeDeviceGetMemoryProperties, ze_device_handle_t, uint32_t *,
37-
ze_device_memory_properties_t *)
38-
39-
// FIXME: for this initialization is needed
40-
// ref: initDevices
41-
// static std::vector<std::pair<sycl::device, ze_device_handle_t>>
42-
// g_sycl_l0_device_list;
43-
44-
// FIXME: rewrite with
45-
// sycl::device.get_info<sycl::ext::intel::info::device::architecture>; cache
46-
// the result
4714
Device getDevice(uint64_t index) {
48-
// ref: getDeviceProperties
49-
50-
// FIXME: double check that initialization is needed
51-
// At the very least, it shouldn't be for every call
52-
xpu::init<true>(ZE_INIT_FLAG_GPU_ONLY);
53-
54-
// FIXME: For now I use the naive approach that the device index from PTI
55-
// record coincides with the default numbering of all devices
56-
57-
uint32_t driverCount = 1;
58-
ze_driver_handle_t driverHandle;
59-
xpu::driverGet<true>(&driverCount, &driverHandle);
60-
uint32_t deviceCount = 1;
61-
62-
// Get device handle
63-
ze_device_handle_t phDevice;
64-
xpu::deviceGet<true>(driverHandle, &deviceCount, &phDevice);
65-
66-
// create a struct to hold device properties
67-
ze_device_properties_t device_properties = {};
68-
device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
69-
xpu::deviceGetProperties<true>(phDevice, &device_properties);
70-
71-
uint32_t clockRate = device_properties.coreClockRate;
72-
uint32_t numSms =
73-
device_properties.numSlices * device_properties.numSubslicesPerSlice;
74-
75-
// create a struct to hold device memory properties
76-
uint32_t memoryCount = 0;
77-
xpu::deviceGetMemoryProperties<true>(phDevice, &memoryCount, nullptr);
78-
auto pMemoryProperties = new ze_device_memory_properties_t[memoryCount];
79-
for (uint32_t mem = 0; mem < memoryCount; ++mem) {
80-
pMemoryProperties[mem].stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
81-
pMemoryProperties[mem].pNext = nullptr;
15+
// void *handle = dlopen(utils_cache_path.data(), RTLD_LAZY);
16+
void *handle = dlopen(std::getenv("PROTON_XPUAPI_LIB_PATH"), RTLD_LAZY);
17+
if (!handle) {
18+
const char *dlopen_error = dlerror();
19+
std::cerr << "Failed to load library: " << dlopen_error << std::endl;
20+
throw std::runtime_error(std::string("Failed to load library: ") +
21+
std::string(dlopen_error));
8222
}
83-
xpu::deviceGetMemoryProperties<true>(phDevice, &memoryCount,
84-
pMemoryProperties);
85-
86-
int memoryClockRate = pMemoryProperties[0].maxClockRate;
87-
int busWidth = pMemoryProperties[0].maxBusWidth;
8823

89-
delete[] pMemoryProperties;
24+
dlerror();
25+
GetDeviceFunc getDeviceFromLib = (GetDeviceFunc)dlsym(handle, "getDevice");
26+
const char *dlsym_error = dlerror();
27+
if (dlsym_error) {
28+
std::cerr << "Failed to load function: " << dlsym_error << std::endl;
29+
dlclose(handle);
30+
throw std::runtime_error(std::string("Failed to load function: ") +
31+
std::string(dlsym_error));
32+
}
9033

91-
// FIXME: there should be architecture, but not a name
92-
std::string arch = device_properties.name;
34+
uint32_t clockRate = 0;
35+
uint32_t memoryClockRate = 0;
36+
uint32_t busWidth = 0;
37+
uint32_t numSms = 0;
38+
char arch[256];
39+
getDeviceFromLib(index, &clockRate, &memoryClockRate, &busWidth, &numSms,
40+
arch);
41+
dlclose(handle);
9342

9443
return Device(DeviceType::XPU, index, clockRate, memoryClockRate, busWidth,
9544
numSms, arch);
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#include "Driver/Dispatch.h"
2+
3+
#include <cstring>
4+
#include <level_zero/ze_api.h>
5+
6+
namespace proton {
7+
8+
namespace xpu {
9+
10+
struct ExternLibLevelZero : public ExternLibBase {
11+
using RetType = ze_result_t;
12+
13+
// FIXME: removeme `/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so.1`
14+
static constexpr const char *name = "libze_intel_gpu.so.1";
15+
static constexpr const char *defaultDir = "";
16+
static constexpr RetType success = ZE_RESULT_SUCCESS;
17+
static void *lib;
18+
};
19+
20+
void *ExternLibLevelZero::lib = nullptr;
21+
22+
// moved here to avoid adding dependency `level_zero/ze_api.h` in `XpuApi.h`
23+
template <bool CheckSuccess> ze_result_t init(ze_init_flags_t flags);
24+
25+
template <bool CheckSuccess>
26+
ze_result_t driverGet(uint32_t *pCount, ze_driver_handle_t *phDrivers);
27+
28+
template <bool CheckSuccess>
29+
ze_result_t deviceGet(ze_driver_handle_t hDriver, uint32_t *pCount,
30+
ze_device_handle_t *phDevices);
31+
32+
template <bool CheckSuccess>
33+
ze_result_t deviceGetProperties(ze_device_handle_t hDevice,
34+
ze_device_properties_t *pDeviceProperties);
35+
36+
template <bool CheckSuccess>
37+
ze_result_t
38+
deviceGetMemoryProperties(ze_device_handle_t hDevice, uint32_t *pCount,
39+
ze_device_memory_properties_t *pMemProperties);
40+
41+
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zeinit
42+
DEFINE_DISPATCH(ExternLibLevelZero, init, zeInit, ze_init_flags_t)
43+
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedriverget
44+
DEFINE_DISPATCH(ExternLibLevelZero, driverGet, zeDriverGet, uint32_t *,
45+
ze_driver_handle_t *)
46+
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedeviceget
47+
DEFINE_DISPATCH(ExternLibLevelZero, deviceGet, zeDeviceGet, ze_driver_handle_t,
48+
uint32_t *, ze_device_handle_t *)
49+
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedevicegetproperties
50+
DEFINE_DISPATCH(ExternLibLevelZero, deviceGetProperties, zeDeviceGetProperties,
51+
ze_device_handle_t, ze_device_properties_t *)
52+
// https://oneapi-src.github.io/level-zero-spec/level-zero/latest/core/api.html#zedevicegetmemoryproperties
53+
DEFINE_DISPATCH(ExternLibLevelZero, deviceGetMemoryProperties,
54+
zeDeviceGetMemoryProperties, ze_device_handle_t, uint32_t *,
55+
ze_device_memory_properties_t *)
56+
57+
// FIXME: for this initialization is needed
58+
// ref: initDevices
59+
// static std::vector<std::pair<sycl::device, ze_device_handle_t>>
60+
// g_sycl_l0_device_list;
61+
62+
// FIXME: rewrite with
63+
// sycl::device.get_info<sycl::ext::intel::info::device::architecture>; cache
64+
// the result
65+
extern "C" void getDevice(uint64_t index, uint32_t* clockRate, uint32_t* memoryClockRate, uint32_t* busWidth, uint32_t* numSms, char arch[256]) {
66+
// ref: getDeviceProperties
67+
68+
// FIXME: double check that initialization is needed
69+
// At the very least, it shouldn't be for every call
70+
xpu::init<true>(ZE_INIT_FLAG_GPU_ONLY);
71+
72+
// FIXME: For now I use the naive approach that the device index from PTI
73+
// record coincides with the default numbering of all devices
74+
uint32_t driverCount = 1;
75+
ze_driver_handle_t driverHandle;
76+
xpu::driverGet<true>(&driverCount, &driverHandle);
77+
uint32_t deviceCount = 1;
78+
// Get device handle
79+
ze_device_handle_t phDevice;
80+
xpu::deviceGet<true>(driverHandle, &deviceCount, &phDevice);
81+
// create a struct to hold device properties
82+
ze_device_properties_t device_properties = {};
83+
device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
84+
xpu::deviceGetProperties<true>(phDevice, &device_properties);
85+
*clockRate = device_properties.coreClockRate;
86+
*numSms =
87+
device_properties.numSlices * device_properties.numSubslicesPerSlice;
88+
// create a struct to hold device memory properties
89+
uint32_t memoryCount = 0;
90+
xpu::deviceGetMemoryProperties<true>(phDevice, &memoryCount, nullptr);
91+
auto pMemoryProperties = new ze_device_memory_properties_t[memoryCount];
92+
for (uint32_t mem = 0; mem < memoryCount; ++mem) {
93+
pMemoryProperties[mem].stype = ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES;
94+
pMemoryProperties[mem].pNext = nullptr;
95+
}
96+
xpu::deviceGetMemoryProperties<true>(phDevice, &memoryCount,
97+
pMemoryProperties);
98+
99+
*memoryClockRate = pMemoryProperties[0].maxClockRate;
100+
*busWidth = pMemoryProperties[0].maxBusWidth;
101+
102+
delete[] pMemoryProperties;
103+
104+
// FIXME: there should be architecture, but not a name
105+
memcpy(arch, device_properties.name, 256);
106+
}
107+
108+
} // namespace xpu
109+
110+
} // namespace proton

0 commit comments

Comments
 (0)