Skip to content

Commit abf6629

Browse files
committed
Merge branch 'sycl' into use-device-usm-for-rtl-data
2 parents 47e04d5 + dbd168c commit abf6629

22 files changed

+271
-101
lines changed

scripts/benchmarks/benches/compute.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def setup(self):
2020
if self.built:
2121
return
2222

23-
repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "08c41bb8bc1762ad53c6194df6d36bfcceff4aa2")
23+
repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "aa6a3b2108bb86202b654ad28129156fa746d41d")
2424
build_path = create_build_path(self.directory, 'compute-benchmarks-build')
2525

2626
configure_command = [
@@ -231,3 +231,26 @@ def bin_args(self) -> list[str]:
231231
"--numberOfElementsY=256",
232232
"--numberOfElementsZ=256",
233233
]
234+
235+
class MemcpyExecute(ComputeBenchmark):
236+
def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations):
237+
self.numOpsPerThread = numOpsPerThread
238+
self.numThreads = numThreads
239+
self.allocSize = allocSize
240+
self.iterations = iterations
241+
super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute")
242+
243+
def name(self):
244+
return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize}"
245+
246+
def bin_args(self) -> list[str]:
247+
return [
248+
"--Ioq=1",
249+
"--UseEvents=1",
250+
"--MeasureCompletion=1",
251+
"--UseQueuePerThread=1",
252+
f"--AllocSize={self.allocSize}",
253+
f"--NumThreads={self.numThreads}",
254+
f"--NumOpsPerThread={self.numOpsPerThread}",
255+
f"--iterations={self.iterations}"
256+
]

scripts/benchmarks/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
4444
ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024),
4545
ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024),
4646
VectorSum(cb),
47+
MemcpyExecute(cb, 400, 8, 1024, 100),
48+
MemcpyExecute(cb, 400, 8, 102400, 10),
49+
MemcpyExecute(cb, 500, 8, 102400, 10),
50+
MemcpyExecute(cb, 400, 1, 1024, 1000),
51+
MemcpyExecute(cb, 10, 16, 1024, 1000),
52+
MemcpyExecute(cb, 10, 16, 102400, 100),
4753

4854
# *** Velocity benchmarks
4955
Hashtable(vb),

source/adapters/level_zero/adapter.cpp

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,43 @@ ur_result_t adapterStateInit() {
144144
return UR_RESULT_SUCCESS;
145145
}
146146

147+
/*
148+
This constructor initializes the `ur_adapter_handle_t_` object and
149+
sets up the environment for Level Zero (L0) initialization.
150+
The behavior of the initialization process is influenced by two
151+
environment variables:
152+
`UR_L0_ENABLE_SYSMAN_ENV_DEFAULT` and `UR_L0_ENABLE_ZESINIT_DEFAULT`.
153+
154+
| Environment Variable | Value | Behavior |
155+
|--------------------------------|-------|----------------------------|
156+
| UR_L0_ENABLE_SYSMAN_ENV_DEFAULT| 1 | Enables the default SysMan |
157+
| | | environment initialization |
158+
| | | by setting |
159+
| | | `ZES_ENABLE_SYSMAN` to "1".|
160+
| | 0 | Disables the default SysMan|
161+
| | | environment initialization.|
162+
| | unset | Defaults to 1, enabling the|
163+
| | | SysMan environment |
164+
| | | initialization. |
165+
| UR_L0_ENABLE_ZESINIT_DEFAULT | 1 | Enables the default SysMan |
166+
| | | initialization by loading |
167+
| | | SysMan-related functions |
168+
| | | and calling `zesInit`. |
169+
| | 0 | Disables the default SysMan|
170+
| | | initialization with zesInit|
171+
| | unset | Defaults to 0, disabling |
172+
| | | the SysMan initialization |
173+
| | | thru zesInit. |
174+
175+
Behavior Summary:
176+
- If `UR_L0_ENABLE_SYSMAN_ENV_DEFAULT` is set to 1 or is unset,
177+
`ZES_ENABLE_SYSMAN` is set to "1".
178+
- If `UR_L0_ENABLE_ZESINIT_DEFAULT` is set to 1 and
179+
`UR_L0_ENABLE_SYSMAN_ENV_DEFAULT` is not set to 1,
180+
SysMan-related functions are loaded and `zesInit` is called.
181+
- If `UR_L0_ENABLE_ZESINIT_DEFAULT` is set to 0 or is unset,
182+
SysMan initialization is skipped.
183+
*/
147184
ur_adapter_handle_t_::ur_adapter_handle_t_()
148185
: logger(logger::get_logger("level_zero")) {
149186

@@ -169,6 +206,14 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
169206
return;
170207
}
171208

209+
// Check if the user has disabled the default L0 Env initialization.
210+
const int UrSysManEnvInitEnabled = [] {
211+
const char *UrRet = std::getenv("UR_L0_ENABLE_SYSMAN_ENV_DEFAULT");
212+
if (!UrRet)
213+
return 1;
214+
return std::atoi(UrRet);
215+
}();
216+
172217
// initialize level zero only once.
173218
if (GlobalAdapter->ZeResult == std::nullopt) {
174219
// Setting these environment variables before running zeInit will enable
@@ -196,6 +241,11 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
196241
if (UrL0InitAllDrivers) {
197242
L0InitFlags |= ZE_INIT_FLAG_VPU_ONLY;
198243
}
244+
245+
// Set ZES_ENABLE_SYSMAN by default if the user has not set it.
246+
if (UrSysManEnvInitEnabled) {
247+
setEnvVar("ZES_ENABLE_SYSMAN", "1");
248+
}
199249
logger::debug("\nzeInit with flags value of {}\n",
200250
static_cast<int>(L0InitFlags));
201251
GlobalAdapter->ZeResult = ZE_CALL_NOCHECK(zeInit, (L0InitFlags));
@@ -223,15 +273,29 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
223273
#else
224274
HMODULE processHandle = nullptr;
225275
#endif
226-
GlobalAdapter->getDeviceByUUIdFunctionPtr =
227-
(zes_pfnDriverGetDeviceByUuidExp_t)ur_loader::LibLoader::getFunctionPtr(
228-
processHandle, "zesDriverGetDeviceByUuidExp");
229-
GlobalAdapter->getSysManDriversFunctionPtr =
230-
(zes_pfnDriverGet_t)ur_loader::LibLoader::getFunctionPtr(
231-
processHandle, "zesDriverGet");
232-
GlobalAdapter->sysManInitFunctionPtr =
233-
(zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(processHandle,
234-
"zesInit");
276+
277+
// Check if the user has enabled the default L0 SysMan initialization.
278+
const int UrSysmanZesinitEnable = [] {
279+
const char *UrRet = std::getenv("UR_L0_ENABLE_ZESINIT_DEFAULT");
280+
if (!UrRet)
281+
return 0;
282+
return std::atoi(UrRet);
283+
}();
284+
285+
// Enable zesInit by default only if ZES_ENABLE_SYSMAN has not been set by
286+
// default and UrSysmanZesinitEnable is true.
287+
if (UrSysmanZesinitEnable && !UrSysManEnvInitEnabled) {
288+
GlobalAdapter->getDeviceByUUIdFunctionPtr =
289+
(zes_pfnDriverGetDeviceByUuidExp_t)
290+
ur_loader::LibLoader::getFunctionPtr(
291+
processHandle, "zesDriverGetDeviceByUuidExp");
292+
GlobalAdapter->getSysManDriversFunctionPtr =
293+
(zes_pfnDriverGet_t)ur_loader::LibLoader::getFunctionPtr(
294+
processHandle, "zesDriverGet");
295+
GlobalAdapter->sysManInitFunctionPtr =
296+
(zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(processHandle,
297+
"zesInit");
298+
}
235299
if (GlobalAdapter->getDeviceByUUIdFunctionPtr &&
236300
GlobalAdapter->getSysManDriversFunctionPtr &&
237301
GlobalAdapter->sysManInitFunctionPtr) {

source/adapters/level_zero/kernel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,7 @@ ur_result_t urKernelGetInfo(
746746
char *attributes = new char[Size];
747747
ZE2UR_CALL(zeKernelGetSourceAttributes,
748748
(Kernel->ZeKernel, &Size, &attributes));
749-
auto Res = ReturnValue(attributes);
749+
auto Res = ReturnValue(static_cast<const char *>(attributes));
750750
delete[] attributes;
751751
return Res;
752752
} catch (const std::bad_alloc &) {

source/adapters/opencl/adapter.cpp

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,49 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11+
#include "adapter.hpp"
1112
#include "common.hpp"
12-
#include "logger/ur_logger.hpp"
13+
#include "ur/ur.hpp"
14+
15+
#ifdef _MSC_VER
16+
#include <Windows.h>
17+
#else
18+
#include <dlfcn.h>
19+
#endif
20+
21+
ur_adapter_handle_t_::ur_adapter_handle_t_() {
22+
#ifdef _MSC_VER
23+
// Loading OpenCL.dll increments the libraries internal reference count.
24+
auto handle = LoadLibraryA("OpenCL.dll");
25+
26+
#define CL_CORE_FUNCTION(FUNC) \
27+
FUNC = reinterpret_cast<decltype(::FUNC) *>(GetProcAddress(handle, "FUNC"));
28+
#include "core_functions.def"
29+
#undef CL_CORE_FUNCTION
30+
31+
// So we can safely decrement it here wihtout actually unloading OpenCL.dll.
32+
FreeLibrary(handle);
33+
#else
34+
// Loading libOpenCL.so to get the library handle but don't dlclose it as
35+
// this causes a segfault when attempting to call any OpenCL entry point.
36+
auto handle = dlopen("libOpenCL.so", RTLD_LOCAL);
37+
38+
#define CL_CORE_FUNCTION(FUNC) \
39+
FUNC = reinterpret_cast<decltype(::FUNC) *>(dlsym(handle, #FUNC));
40+
#include "core_functions.def"
41+
#undef CL_CORE_FUNCTION
42+
43+
#endif
44+
}
1345

14-
struct ur_adapter_handle_t_ {
15-
std::atomic<uint32_t> RefCount = 0;
16-
std::mutex Mutex;
17-
logger::Logger &log = logger::get_logger("opencl");
18-
};
46+
static ur_adapter_handle_t adapter = nullptr;
1947

20-
static ur_adapter_handle_t_ *adapter = nullptr;
48+
ur_adapter_handle_t ur::cl::getAdapter() {
49+
if (!adapter) {
50+
die("OpenCL adapter used before initalization or after destruction");
51+
}
52+
return adapter;
53+
}
2154

2255
static void globalAdapterShutdown() {
2356
if (cl_ext::ExtFuncPtrCache) {

source/adapters/opencl/adapter.hpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,26 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11-
struct ur_adapter_handle_t_;
11+
#include "CL/cl.h"
12+
#include "logger/ur_logger.hpp"
1213

13-
extern ur_adapter_handle_t_ adapter;
14+
struct ur_adapter_handle_t_ {
15+
ur_adapter_handle_t_();
16+
17+
std::atomic<uint32_t> RefCount = 0;
18+
std::mutex Mutex;
19+
logger::Logger &log = logger::get_logger("opencl");
20+
21+
// Function pointers to core OpenCL entry points which may not exist in older
22+
// versions of the OpenCL-ICD-Loader are tracked here and initialized by
23+
// dynamically loading the symbol by name.
24+
#define CL_CORE_FUNCTION(FUNC) decltype(::FUNC) *FUNC = nullptr;
25+
#include "core_functions.def"
26+
#undef CL_CORE_FUNCTION
27+
};
28+
29+
namespace ur {
30+
namespace cl {
31+
ur_adapter_handle_t getAdapter();
32+
} // namespace cl
33+
} // namespace ur

source/adapters/opencl/common.hpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,6 @@ cl_int(CL_API_CALL *)(cl_command_queue, cl_program, const char *, cl_bool,
239239
size_t, size_t, void *, cl_uint, const cl_event *,
240240
cl_event *);
241241

242-
using clSetProgramSpecializationConstant_fn = CL_API_ENTRY
243-
cl_int(CL_API_CALL *)(cl_program program, cl_uint spec_id, size_t spec_size,
244-
const void *spec_value);
245-
246242
using clEnqueueReadHostPipeINTEL_fn = CL_API_ENTRY
247243
cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program,
248244
const char *pipe_symbol, cl_bool blocking, void *ptr,

source/adapters/opencl/context.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "context.hpp"
12+
#include "adapter.hpp"
1213

1314
#include <mutex>
1415
#include <set>
@@ -169,6 +170,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle(
169170
UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
170171
ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter,
171172
void *pUserData) {
173+
if (!ur::cl::getAdapter()->clSetContextDestructorCallback) {
174+
ur::cl::getAdapter()->log.warning(
175+
"clSetContextDestructorCallback not found, consider upgrading the "
176+
"OpenCL-ICD-Loader to the latest version.");
177+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
178+
}
179+
172180
static std::unordered_map<ur_context_handle_t,
173181
std::set<ur_context_extended_deleter_t>>
174182
ContextCallbackMap;
@@ -212,7 +220,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter(
212220
auto *C = static_cast<ContextCallback *>(pUserData);
213221
C->execute();
214222
};
215-
CL_RETURN_ON_FAILURE(clSetContextDestructorCallback(
223+
CL_RETURN_ON_FAILURE(ur::cl::getAdapter()->clSetContextDestructorCallback(
216224
cl_adapter::cast<cl_context>(hContext), ClCallback, Callback));
217225

218226
return UR_RESULT_SUCCESS;
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Introduced in OpenCL 2.0
2+
// TODO: clCreateCommandQueueWithProperties
3+
4+
// Introduced in OpenCL 2.1
5+
// TODO: clGetKernelSubGroupInfo
6+
// TODO: clCreateProgramWithIL
7+
// TODO: clGetHostTimer
8+
// TODO: clGetDeviceAndHostTimer
9+
10+
// Introduced in OpenCL 2.2
11+
CL_CORE_FUNCTION(clSetProgramSpecializationConstant)
12+
13+
// Introduced in OpenCL 3.0
14+
CL_CORE_FUNCTION(clSetContextDestructorCallback)

source/adapters/opencl/extension_functions.def

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ CL_EXTENSION_FUNC(clEnqueueWriteGlobalVariable)
1313
CL_EXTENSION_FUNC(clEnqueueReadGlobalVariable)
1414
CL_EXTENSION_FUNC(clEnqueueReadHostPipeINTEL)
1515
CL_EXTENSION_FUNC(clEnqueueWriteHostPipeINTEL)
16-
CL_EXTENSION_FUNC(clSetProgramSpecializationConstant)
1716
CL_EXTENSION_FUNC(clCreateCommandBufferKHR)
1817
CL_EXTENSION_FUNC(clRetainCommandBufferKHR)
1918
CL_EXTENSION_FUNC(clReleaseCommandBufferKHR)

0 commit comments

Comments
 (0)