From 07bc2b3589fae51347f587d06659a7f87ee2c03c Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Mon, 21 Jul 2025 09:47:04 -0500 Subject: [PATCH 1/5] Try the release 2025.2.0 of oneAPI --- deps/build_local.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/build_local.jl b/deps/build_local.jl index 52560f47..c8ae4e33 100644 --- a/deps/build_local.jl +++ b/deps/build_local.jl @@ -45,7 +45,7 @@ if !isfile(joinpath(conda_dir, "condarc-julia.yml")) touch(joinpath(conda_dir, "conda-meta", "history")) end Conda.add_channel("https://software.repos.intel.com/python/conda/", conda_dir) -Conda.add(["dpcpp_linux-64=2025.1.0", "mkl-devel-dpcpp=2025.1.0"], conda_dir) +Conda.add(["dpcpp_linux-64=2025.2.0", "mkl-devel-dpcpp=2025.2.0"], conda_dir) Conda.list(conda_dir) From 37cd05d1978ff7ed2311707462fa52394c516464 Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Wed, 30 Jul 2025 02:17:46 -0500 Subject: [PATCH 2/5] Update oneAPI.jl for the release 2025.2.0 --- .gitignore | 6 ++++++ deps/Project.toml | 2 +- deps/generate_helpers.jl | 2 +- deps/generate_interfaces.jl | 1 + deps/src/onemkl.cpp | 10 ++++++++++ deps/src/onemkl.h | 7 +++++++ lib/mkl/wrappers_sparse.jl | 12 ++++++++++++ test/Project.toml | 1 + test/onemkl.jl | 1 + 9 files changed, 40 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index c22bac86..736148be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ LocalPreferences.toml Manifest.toml +deps/onemkl_blas.cpp +deps/onemkl_blas.h +deps/onemkl_lapack.cpp +deps/onemkl_lapack.h +deps/onemkl_sparse.cpp +deps/onemkl_sparse.h diff --git a/deps/Project.toml b/deps/Project.toml index b329510b..b8446dbe 100644 --- a/deps/Project.toml +++ b/deps/Project.toml @@ -12,4 +12,4 @@ oneAPI_Level_Zero_Headers_jll = "f4bc562b-d309-54f8-9efb-476e56f0410d" oneAPI_Support_Headers_jll = "24f86df5-245d-5634-a4cc-32433d9800b3" [compat] -oneAPI_Support_Headers_jll = "=2025.1.0" +oneAPI_Support_Headers_jll = "=2025.2.0" diff --git a/deps/generate_helpers.jl b/deps/generate_helpers.jl index f17d6786..01f162d9 100644 --- a/deps/generate_helpers.jl +++ b/deps/generate_helpers.jl @@ -1,6 +1,6 @@ non_parametric_routines = ["init_matrix_handle", "release_matrix_handle", "set_matrix_property", "init_matmat_descr", "release_matmat_descr", "set_matmat_data", "get_matmat_data", "matmat", -"omatcopy", "sort_matrix", "optimize_gemv", "optimize_trmv", "optimize_trsv", "optimize_trsm", +"omatcopy", "sort_matrix", "optimize_gemv", "optimize_gemm", "optimize_trmv", "optimize_trsv", "optimize_trsm", "init_omatconvert_descr", "release_omatconvert_descr", "init_omatadd_descr", "release_omatadd_descr", "omatconvert_buffer_size", "omatconvert_analyze", "omatconvert_get_nnz", "omatconvert", "omatadd_buffer_size", "omatadd_analyze", "omatadd_get_nnz"] diff --git a/deps/generate_interfaces.jl b/deps/generate_interfaces.jl index 50378398..a36fd0a8 100644 --- a/deps/generate_interfaces.jl +++ b/deps/generate_interfaces.jl @@ -241,6 +241,7 @@ function generate_headers(library::String, filename::Vector{String}, output::Str # The routine "optimize_trsm" has two versions. suffix = "" (name_routine == "optimize_trsm") && occursin("columns", header) && (suffix = "_advanced") + (name_routine == "optimize_gemm") && occursin("columns", header) && (suffix = "_advanced") name_routine ∈ ("set_csr_data", "set_coo_data") && occursin("int64_t", header) && (suffix = "_64") occursin("batch", name_routine) && !occursin("**", header) && (suffix = "_strided") diff --git a/deps/src/onemkl.cpp b/deps/src/onemkl.cpp index f10a3f57..de667728 100644 --- a/deps/src/onemkl.cpp +++ b/deps/src/onemkl.cpp @@ -4038,6 +4038,16 @@ extern "C" int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo return 0; } +extern "C" int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t A) { + auto status = oneapi::mkl::sparse::optimize_gemm(device_queue->val, convert(opA), (oneapi::mkl::sparse::matrix_handle_t) A, {}); + return 0; +} + +extern "C" int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemklLayout layout_val, onemklTranspose opA, onemklTranspose opB, matrix_handle_t A, int64_t columns) { + auto status = oneapi::mkl::sparse::optimize_gemm(device_queue->val, convert(layout_val), convert(opA), convert(opB), (oneapi::mkl::sparse::matrix_handle_t) A, columns, {}); + return 0; +} + extern "C" int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A) { auto status = oneapi::mkl::sparse::optimize_trsm(device_queue->val, convert(uplo_val), convert(opA), convert(diag_val), (oneapi::mkl::sparse::matrix_handle_t) A, {}); return 0; diff --git a/deps/src/onemkl.h b/deps/src/onemkl.h index cf0d3891..d6a448b9 100644 --- a/deps/src/onemkl.h +++ b/deps/src/onemkl.h @@ -2842,6 +2842,13 @@ int onemklXsparse_optimize_trmv(syclQueue_t device_queue, onemklUplo uplo_val, o int onemklXsparse_optimize_trsv(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); +int onemklXsparse_optimize_gemm(syclQueue_t device_queue, onemklTranspose opA, matrix_handle_t + A); + +int onemklXsparse_optimize_gemm_advanced(syclQueue_t device_queue, onemklLayout layout_val, + onemklTranspose opA, onemklTranspose opB, + matrix_handle_t A, int64_t columns); + int onemklXsparse_optimize_trsm(syclQueue_t device_queue, onemklUplo uplo_val, onemklTranspose opA, onemklDiag diag_val, matrix_handle_t A); diff --git a/lib/mkl/wrappers_sparse.jl b/lib/mkl/wrappers_sparse.jl index e554330c..f39cbd03 100644 --- a/lib/mkl/wrappers_sparse.jl +++ b/lib/mkl/wrappers_sparse.jl @@ -127,6 +127,18 @@ for (fname, elty) in ((:onemklSsparse_gemm, :Float32), end end +function sparse_optimize_gemm!(trans::Char, A::oneSparseMatrixCSR) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_gemm(sycl_queue(queue), trans, A.handle) + return A +end + +function sparse_optimize_gemm!(trans::Char, transB::Char, nrhs::Int, A::oneSparseMatrixCSR) + queue = global_queue(context(A.nzVal), device(A.nzVal)) + onemklXsparse_optimize_gemm_advanced(sycl_queue(queue), 'C', trans, transB, A.handle, nrhs) + return A +end + for (fname, elty) in ((:onemklSsparse_symv, :Float32), (:onemklDsparse_symv, :Float64), (:onemklCsparse_symv, :ComplexF32), diff --git a/test/Project.toml b/test/Project.toml index 62cdf0f8..a49bf765 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -18,4 +18,5 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" libigc_jll = "94295238-5935-5bd7-bb0f-b00942e9bdd5" +oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" oneAPI_Support_jll = "b049733a-a71d-5ed3-8eba-7d323ac00b36" diff --git a/test/onemkl.jl b/test/onemkl.jl index 50651584..e5b6541c 100644 --- a/test/onemkl.jl +++ b/test/onemkl.jl @@ -1132,6 +1132,7 @@ end alpha = rand(T) beta = rand(T) + oneMKL.sparse_optimize_gemm!(transa, dA) oneMKL.sparse_gemm!(transa, transb, alpha, dA, dB, beta, dC) @test alpha * opa(A) * opb(B) + beta * C ≈ collect(dC) end From ef1ce6db9e49314c67c9d4989f52523b3f7dc13a Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Fri, 8 Aug 2025 11:33:20 -0500 Subject: [PATCH 3/5] Update liboneapi_support.jl --- lib/support/liboneapi_support.jl | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/lib/support/liboneapi_support.jl b/lib/support/liboneapi_support.jl index 90d2ed1c..9b5858f3 100644 --- a/lib/support/liboneapi_support.jl +++ b/lib/support/liboneapi_support.jl @@ -180,6 +180,14 @@ end ONEMKL_MATMAT_REQUEST_FINALIZE = 8 end +@cenum onemklOmatconvertAlg::UInt32 begin + ONEMKL_OMATCONVERT_DEFAULT_ALG = 0 +end + +@cenum onemklOmataddAlg::UInt32 begin + ONEMKL_OMATADD_DEFAULT_ALG = 0 +end + mutable struct matrix_handle end const matrix_handle_t = Ptr{matrix_handle} @@ -188,6 +196,14 @@ mutable struct matmat_descr end const matmat_descr_t = Ptr{matmat_descr} +mutable struct omatconvert_descr end + +const omatconvert_descr_t = Ptr{omatconvert_descr} + +mutable struct omatadd_descr end + +const omatadd_descr_t = Ptr{omatadd_descr} + function onemklHgemm_batch(device_queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size) @ccall liboneapi_support.onemklHgemm_batch(device_queue::syclQueue_t, @@ -6596,6 +6612,26 @@ function onemklXsparse_release_matmat_descr(p_desc) @ccall liboneapi_support.onemklXsparse_release_matmat_descr(p_desc::Ptr{matmat_descr_t})::Cint end +function onemklXsparse_init_omatconvert_descr(device_queue, p_descr) + @ccall liboneapi_support.onemklXsparse_init_omatconvert_descr(device_queue::syclQueue_t, + p_descr::Ptr{omatconvert_descr_t})::Cint +end + +function onemklXsparse_release_omatconvert_descr(device_queue, descr) + @ccall liboneapi_support.onemklXsparse_release_omatconvert_descr(device_queue::syclQueue_t, + descr::omatconvert_descr_t)::Cint +end + +function onemklXsparse_init_omatadd_descr(device_queue, p_omatadd_desc) + @ccall liboneapi_support.onemklXsparse_init_omatadd_descr(device_queue::syclQueue_t, + p_omatadd_desc::Ptr{omatadd_descr_t})::Cint +end + +function onemklXsparse_release_omatadd_descr(device_queue, omatadd_desc) + @ccall liboneapi_support.onemklXsparse_release_omatadd_descr(device_queue::syclQueue_t, + omatadd_desc::omatadd_descr_t)::Cint +end + function onemklXsparse_omatcopy(device_queue, transpose_val, spMat_in, spMat_out) @ccall liboneapi_support.onemklXsparse_omatcopy(device_queue::syclQueue_t, transpose_val::onemklTranspose, @@ -6658,6 +6694,22 @@ function onemklXsparse_optimize_trsv(device_queue, uplo_val, opA, diag_val, A) A::matrix_handle_t)::Cint end +function onemklXsparse_optimize_gemm(device_queue, opA, A) + @ccall liboneapi_support.onemklXsparse_optimize_gemm(device_queue::syclQueue_t, + opA::onemklTranspose, + A::matrix_handle_t)::Cint +end + +function onemklXsparse_optimize_gemm_advanced(device_queue, layout_val, opA, opB, A, + columns) + @ccall liboneapi_support.onemklXsparse_optimize_gemm_advanced(device_queue::syclQueue_t, + layout_val::onemklLayout, + opA::onemklTranspose, + opB::onemklTranspose, + A::matrix_handle_t, + columns::Int64)::Cint +end + function onemklXsparse_optimize_trsm(device_queue, uplo_val, opA, diag_val, A) @ccall liboneapi_support.onemklXsparse_optimize_trsm(device_queue::syclQueue_t, uplo_val::onemklUplo, From c090fb97a0eec96cb7bedb23330abfc164ddce78 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 8 Aug 2025 11:34:39 -0500 Subject: [PATCH 4/5] Update libze.jl --- lib/level-zero/libze.jl | 712 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 708 insertions(+), 4 deletions(-) diff --git a/lib/level-zero/libze.jl b/lib/level-zero/libze.jl index 9a22e93e..d8ad0b83 100644 --- a/lib/level-zero/libze.jl +++ b/lib/level-zero/libze.jl @@ -11,7 +11,7 @@ end function check(f) res = retry_reclaim(err -> err == RESULT_ERROR_OUT_OF_HOST_MEMORY || - err == RESULT_ERROR_OUT_OF_DEVICE_MEMORY) do + err == RESULT_ERROR_OUT_OF_DEVICE_MEMORY) do return f() end @@ -147,6 +147,11 @@ const ze_ipc_event_pool_handle_t = _ze_ipc_event_pool_handle_t ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE = 2013265945 ZE_RESULT_ERROR_OVERLAPPING_REGIONS = 2013265946 ZE_RESULT_WARNING_ACTION_REQUIRED = 2013265947 + ZE_RESULT_ERROR_INVALID_KERNEL_HANDLE = 2013265948 + ZE_RESULT_EXT_RTAS_BUILD_RETRY = 2013265949 + ZE_RESULT_EXT_RTAS_BUILD_DEFERRED = 2013265950 + ZE_RESULT_EXT_ERROR_OPERANDS_INCOMPATIBLE = 2013265951 + ZE_RESULT_ERROR_SURVIVABILITY_MODE_DETECTED = 2013265952 ZE_RESULT_ERROR_UNKNOWN = 2147483646 ZE_RESULT_FORCE_UINT32 = 2147483647 end @@ -239,6 +244,22 @@ const ze_result_t = _ze_result_t ZE_STRUCTURE_TYPE_PITCHED_ALLOC_DEVICE_EXP_PROPERTIES = 131101 ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC = 131102 ZE_STRUCTURE_TYPE_PITCHED_IMAGE_EXP_DESC = 131103 + ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC = 131104 + ZE_STRUCTURE_TYPE_INIT_DRIVER_TYPE_DESC = 131105 + ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_EXT_DESC = 131106 + ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_WIN32_EXT_DESC = 131107 + ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_FD_EXT_DESC = 131108 + ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_EXT = 131109 + ZE_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_WAIT_PARAMS_EXT = 131110 + ZE_STRUCTURE_TYPE_DRIVER_DDI_HANDLES_EXT_PROPERTIES = 131111 + ZE_STRUCTURE_TYPE_DEVICE_CACHELINE_SIZE_EXT = 131112 + ZE_STRUCTURE_TYPE_DEVICE_VECTOR_WIDTH_PROPERTIES_EXT = 131113 + ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_DESC = 131120 + ZE_STRUCTURE_TYPE_RTAS_BUILDER_BUILD_OP_EXT_DESC = 131121 + ZE_STRUCTURE_TYPE_RTAS_BUILDER_EXT_PROPERTIES = 131122 + ZE_STRUCTURE_TYPE_RTAS_PARALLEL_OPERATION_EXT_PROPERTIES = 131123 + ZE_STRUCTURE_TYPE_RTAS_DEVICE_EXT_PROPERTIES = 131124 + ZE_STRUCTURE_TYPE_RTAS_GEOMETRY_AABBS_EXT_CB_PARAMS = 131125 ZE_STRUCTURE_TYPE_FORCE_UINT32 = 2147483647 end @@ -306,6 +327,16 @@ end const ze_base_desc_t = _ze_base_desc_t +const ze_init_driver_type_flags_t = UInt32 + +struct _ze_init_driver_type_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + flags::ze_init_driver_type_flags_t +end + +const ze_init_driver_type_desc_t = _ze_init_driver_type_desc_t + struct _ze_driver_uuid_t id::NTuple{16,UInt8} end @@ -687,6 +718,9 @@ const ze_fence_desc_t = _ze_fence_desc_t ZE_IMAGE_FORMAT_LAYOUT_444P = 40 ZE_IMAGE_FORMAT_LAYOUT_RGBP = 41 ZE_IMAGE_FORMAT_LAYOUT_BRGP = 42 + ZE_IMAGE_FORMAT_LAYOUT_8_8_8 = 43 + ZE_IMAGE_FORMAT_LAYOUT_16_16_16 = 44 + ZE_IMAGE_FORMAT_LAYOUT_32_32_32 = 45 ZE_IMAGE_FORMAT_LAYOUT_FORCE_UINT32 = 2147483647 end @@ -1038,6 +1072,345 @@ end const ze_relaxed_allocation_limits_exp_desc_t = _ze_relaxed_allocation_limits_exp_desc_t +const ze_driver_ddi_handle_ext_flags_t = UInt32 + +struct _ze_driver_ddi_handles_ext_properties_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + flags::ze_driver_ddi_handle_ext_flags_t +end + +const ze_driver_ddi_handles_ext_properties_t = _ze_driver_ddi_handles_ext_properties_t + +const ze_external_semaphore_ext_flags_t = UInt32 + +struct _ze_external_semaphore_ext_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + flags::ze_external_semaphore_ext_flags_t +end + +const ze_external_semaphore_ext_desc_t = _ze_external_semaphore_ext_desc_t + +struct _ze_external_semaphore_win32_ext_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + handle::Ptr{Cvoid} + name::Ptr{Cchar} +end + +const ze_external_semaphore_win32_ext_desc_t = _ze_external_semaphore_win32_ext_desc_t + +struct _ze_external_semaphore_fd_ext_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + fd::Cint +end + +const ze_external_semaphore_fd_ext_desc_t = _ze_external_semaphore_fd_ext_desc_t + +struct _ze_external_semaphore_signal_params_ext_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + value::UInt64 +end + +const ze_external_semaphore_signal_params_ext_t = _ze_external_semaphore_signal_params_ext_t + +struct _ze_external_semaphore_wait_params_ext_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + value::UInt64 +end + +const ze_external_semaphore_wait_params_ext_t = _ze_external_semaphore_wait_params_ext_t + +struct _ze_device_cache_line_size_ext_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + cacheLineSize::Csize_t +end + +const ze_device_cache_line_size_ext_t = _ze_device_cache_line_size_ext_t + +@cenum _ze_rtas_builder_ext_version_t::UInt32 begin + ZE_RTAS_BUILDER_EXT_VERSION_1_0 = 65536 + ZE_RTAS_BUILDER_EXT_VERSION_CURRENT = 65536 + ZE_RTAS_BUILDER_EXT_VERSION_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_ext_version_t = _ze_rtas_builder_ext_version_t + +struct _ze_rtas_builder_ext_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + builderVersion::ze_rtas_builder_ext_version_t +end + +const ze_rtas_builder_ext_desc_t = _ze_rtas_builder_ext_desc_t + +const ze_rtas_builder_ext_flags_t = UInt32 + +struct _ze_rtas_builder_ext_properties_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + flags::ze_rtas_builder_ext_flags_t + rtasBufferSizeBytesExpected::Csize_t + rtasBufferSizeBytesMaxRequired::Csize_t + scratchBufferSizeBytes::Csize_t +end + +const ze_rtas_builder_ext_properties_t = _ze_rtas_builder_ext_properties_t + +const ze_rtas_parallel_operation_ext_flags_t = UInt32 + +struct _ze_rtas_parallel_operation_ext_properties_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + flags::ze_rtas_parallel_operation_ext_flags_t + maxConcurrency::UInt32 +end + +const ze_rtas_parallel_operation_ext_properties_t = _ze_rtas_parallel_operation_ext_properties_t + +const ze_rtas_device_ext_flags_t = UInt32 + +@cenum _ze_rtas_format_ext_t::UInt32 begin + ZE_RTAS_FORMAT_EXT_INVALID = 0 + ZE_RTAS_FORMAT_EXT_MAX = 2147483646 + ZE_RTAS_FORMAT_EXT_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_format_ext_t = _ze_rtas_format_ext_t + +struct _ze_rtas_device_ext_properties_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + flags::ze_rtas_device_ext_flags_t + rtasFormat::ze_rtas_format_ext_t + rtasBufferAlignment::UInt32 +end + +const ze_rtas_device_ext_properties_t = _ze_rtas_device_ext_properties_t + +struct _ze_rtas_float3_ext_t + x::Cfloat + y::Cfloat + z::Cfloat +end + +const ze_rtas_float3_ext_t = _ze_rtas_float3_ext_t + +struct _ze_rtas_transform_float3x4_column_major_ext_t + vx_x::Cfloat + vx_y::Cfloat + vx_z::Cfloat + vy_x::Cfloat + vy_y::Cfloat + vy_z::Cfloat + vz_x::Cfloat + vz_y::Cfloat + vz_z::Cfloat + p_x::Cfloat + p_y::Cfloat + p_z::Cfloat +end + +const ze_rtas_transform_float3x4_column_major_ext_t = _ze_rtas_transform_float3x4_column_major_ext_t + +struct _ze_rtas_transform_float3x4_aligned_column_major_ext_t + vx_x::Cfloat + vx_y::Cfloat + vx_z::Cfloat + pad0::Cfloat + vy_x::Cfloat + vy_y::Cfloat + vy_z::Cfloat + pad1::Cfloat + vz_x::Cfloat + vz_y::Cfloat + vz_z::Cfloat + pad2::Cfloat + p_x::Cfloat + p_y::Cfloat + p_z::Cfloat + pad3::Cfloat +end + +const ze_rtas_transform_float3x4_aligned_column_major_ext_t = _ze_rtas_transform_float3x4_aligned_column_major_ext_t + +struct _ze_rtas_transform_float3x4_row_major_ext_t + vx_x::Cfloat + vy_x::Cfloat + vz_x::Cfloat + p_x::Cfloat + vx_y::Cfloat + vy_y::Cfloat + vz_y::Cfloat + p_y::Cfloat + vx_z::Cfloat + vy_z::Cfloat + vz_z::Cfloat + p_z::Cfloat +end + +const ze_rtas_transform_float3x4_row_major_ext_t = _ze_rtas_transform_float3x4_row_major_ext_t + +struct _ze_rtas_aabb_ext_t + lower::ze_rtas_float3_ext_t + upper::ze_rtas_float3_ext_t +end + +const ze_rtas_aabb_ext_t = _ze_rtas_aabb_ext_t + +struct _ze_rtas_triangle_indices_uint32_ext_t + v0::UInt32 + v1::UInt32 + v2::UInt32 +end + +const ze_rtas_triangle_indices_uint32_ext_t = _ze_rtas_triangle_indices_uint32_ext_t + +struct _ze_rtas_quad_indices_uint32_ext_t + v0::UInt32 + v1::UInt32 + v2::UInt32 + v3::UInt32 +end + +const ze_rtas_quad_indices_uint32_ext_t = _ze_rtas_quad_indices_uint32_ext_t + +const ze_rtas_builder_packed_geometry_type_ext_t = UInt8 + +struct _ze_rtas_builder_geometry_info_ext_t + geometryType::ze_rtas_builder_packed_geometry_type_ext_t +end + +const ze_rtas_builder_geometry_info_ext_t = _ze_rtas_builder_geometry_info_ext_t + +const ze_rtas_builder_packed_geometry_ext_flags_t = UInt8 + +const ze_rtas_builder_packed_input_data_format_ext_t = UInt8 + +struct _ze_rtas_builder_triangles_geometry_info_ext_t + geometryType::ze_rtas_builder_packed_geometry_type_ext_t + geometryFlags::ze_rtas_builder_packed_geometry_ext_flags_t + geometryMask::UInt8 + triangleFormat::ze_rtas_builder_packed_input_data_format_ext_t + vertexFormat::ze_rtas_builder_packed_input_data_format_ext_t + triangleCount::UInt32 + vertexCount::UInt32 + triangleStride::UInt32 + vertexStride::UInt32 + pTriangleBuffer::Ptr{Cvoid} + pVertexBuffer::Ptr{Cvoid} +end + +const ze_rtas_builder_triangles_geometry_info_ext_t = _ze_rtas_builder_triangles_geometry_info_ext_t + +struct _ze_rtas_builder_quads_geometry_info_ext_t + geometryType::ze_rtas_builder_packed_geometry_type_ext_t + geometryFlags::ze_rtas_builder_packed_geometry_ext_flags_t + geometryMask::UInt8 + quadFormat::ze_rtas_builder_packed_input_data_format_ext_t + vertexFormat::ze_rtas_builder_packed_input_data_format_ext_t + quadCount::UInt32 + vertexCount::UInt32 + quadStride::UInt32 + vertexStride::UInt32 + pQuadBuffer::Ptr{Cvoid} + pVertexBuffer::Ptr{Cvoid} +end + +const ze_rtas_builder_quads_geometry_info_ext_t = _ze_rtas_builder_quads_geometry_info_ext_t + +struct _ze_rtas_geometry_aabbs_ext_cb_params_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + primID::UInt32 + primIDCount::UInt32 + pGeomUserPtr::Ptr{Cvoid} + pBuildUserPtr::Ptr{Cvoid} + pBoundsOut::Ptr{ze_rtas_aabb_ext_t} +end + +const ze_rtas_geometry_aabbs_ext_cb_params_t = _ze_rtas_geometry_aabbs_ext_cb_params_t + +# typedef void ( * ze_rtas_geometry_aabbs_cb_ext_t ) ( ze_rtas_geometry_aabbs_ext_cb_params_t * params ///< [in] callback function parameters structure ) +const ze_rtas_geometry_aabbs_cb_ext_t = Ptr{Cvoid} + +struct _ze_rtas_builder_procedural_geometry_info_ext_t + geometryType::ze_rtas_builder_packed_geometry_type_ext_t + geometryFlags::ze_rtas_builder_packed_geometry_ext_flags_t + geometryMask::UInt8 + reserved::UInt8 + primCount::UInt32 + pfnGetBoundsCb::ze_rtas_geometry_aabbs_cb_ext_t + pGeomUserPtr::Ptr{Cvoid} +end + +const ze_rtas_builder_procedural_geometry_info_ext_t = _ze_rtas_builder_procedural_geometry_info_ext_t + +const ze_rtas_builder_packed_instance_ext_flags_t = UInt8 + +struct _ze_rtas_builder_instance_geometry_info_ext_t + geometryType::ze_rtas_builder_packed_geometry_type_ext_t + instanceFlags::ze_rtas_builder_packed_instance_ext_flags_t + geometryMask::UInt8 + transformFormat::ze_rtas_builder_packed_input_data_format_ext_t + instanceUserID::UInt32 + pTransform::Ptr{Cvoid} + pBounds::Ptr{ze_rtas_aabb_ext_t} + pAccelerationStructure::Ptr{Cvoid} +end + +const ze_rtas_builder_instance_geometry_info_ext_t = _ze_rtas_builder_instance_geometry_info_ext_t + +@cenum _ze_rtas_builder_build_quality_hint_ext_t::UInt32 begin + ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_LOW = 0 + ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_MEDIUM = 1 + ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_HIGH = 2 + ZE_RTAS_BUILDER_BUILD_QUALITY_HINT_EXT_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_build_quality_hint_ext_t = _ze_rtas_builder_build_quality_hint_ext_t + +const ze_rtas_builder_build_op_ext_flags_t = UInt32 + +struct _ze_rtas_builder_build_op_ext_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + rtasFormat::ze_rtas_format_ext_t + buildQuality::ze_rtas_builder_build_quality_hint_ext_t + buildFlags::ze_rtas_builder_build_op_ext_flags_t + ppGeometries::Ptr{Ptr{ze_rtas_builder_geometry_info_ext_t}} + numGeometries::UInt32 +end + +const ze_rtas_builder_build_op_ext_desc_t = _ze_rtas_builder_build_op_ext_desc_t + +struct _ze_device_vector_width_properties_ext_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + vector_width_size::UInt32 + preferred_vector_width_char::UInt32 + preferred_vector_width_short::UInt32 + preferred_vector_width_int::UInt32 + preferred_vector_width_long::UInt32 + preferred_vector_width_float::UInt32 + preferred_vector_width_double::UInt32 + preferred_vector_width_half::UInt32 + native_vector_width_char::UInt32 + native_vector_width_short::UInt32 + native_vector_width_int::UInt32 + native_vector_width_long::UInt32 + native_vector_width_float::UInt32 + native_vector_width_double::UInt32 + native_vector_width_half::UInt32 +end + +const ze_device_vector_width_properties_ext_t = _ze_device_vector_width_properties_ext_t + struct _ze_cache_reservation_ext_desc_t stype::ze_structure_type_t pNext::Ptr{Cvoid} @@ -1425,6 +1798,7 @@ const ze_rtas_device_exp_flags_t = UInt32 @cenum _ze_rtas_format_exp_t::UInt32 begin ZE_RTAS_FORMAT_EXP_INVALID = 0 + ZE_RTAS_FORMAT_EXP_MAX = 2147483646 ZE_RTAS_FORMAT_EXP_FORCE_UINT32 = 2147483647 end @@ -1752,6 +2126,16 @@ end const ze_mutable_global_offset_exp_desc_t = _ze_mutable_global_offset_exp_desc_t +struct _ze_mutable_graph_argument_exp_desc_t + stype::ze_structure_type_t + pNext::Ptr{Cvoid} + commandId::UInt64 + argIndex::UInt32 + pArgValue::Ptr{Cvoid} +end + +const ze_mutable_graph_argument_exp_desc_t = _ze_mutable_graph_argument_exp_desc_t + const ze_init_flags_t = UInt32 @cenum _ze_init_flag_t::UInt32 begin @@ -1771,6 +2155,20 @@ end phDrivers::Ptr{ze_driver_handle_t})::ze_result_t end +@cenum _ze_init_driver_type_flag_t::UInt32 begin + ZE_INIT_DRIVER_TYPE_FLAG_GPU = 1 + ZE_INIT_DRIVER_TYPE_FLAG_NPU = 2 + ZE_INIT_DRIVER_TYPE_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_init_driver_type_flag_t = _ze_init_driver_type_flag_t + +@checked function zeInitDrivers(pCount, phDrivers, desc) + @ccall libze_loader.zeInitDrivers(pCount::Ptr{UInt32}, + phDrivers::Ptr{ze_driver_handle_t}, + desc::Ptr{ze_init_driver_type_desc_t})::ze_result_t +end + @cenum _ze_api_version_t::UInt32 begin ZE_API_VERSION_1_0 = 65536 ZE_API_VERSION_1_1 = 65537 @@ -1782,7 +2180,11 @@ end ZE_API_VERSION_1_7 = 65543 ZE_API_VERSION_1_8 = 65544 ZE_API_VERSION_1_9 = 65545 - ZE_API_VERSION_CURRENT = 65545 + ZE_API_VERSION_1_10 = 65546 + ZE_API_VERSION_1_11 = 65547 + ZE_API_VERSION_1_12 = 65548 + ZE_API_VERSION_1_13 = 65549 + ZE_API_VERSION_CURRENT = 65549 ZE_API_VERSION_FORCE_UINT32 = 2147483647 end @@ -2939,7 +3341,8 @@ end end @cenum _ze_physical_mem_flag_t::UInt32 begin - ZE_PHYSICAL_MEM_FLAG_TBD = 1 + ZE_PHYSICAL_MEM_FLAG_ALLOCATE_ON_DEVICE = 1 + ZE_PHYSICAL_MEM_FLAG_ALLOCATE_ON_HOST = 2 ZE_PHYSICAL_MEM_FLAG_FORCE_UINT32 = 2147483647 end @@ -3032,6 +3435,271 @@ end const ze_relaxed_allocation_limits_exp_flag_t = _ze_relaxed_allocation_limits_exp_flag_t +@cenum _ze_kernel_get_binary_exp_version_t::UInt32 begin + ZE_KERNEL_GET_BINARY_EXP_VERSION_1_0 = 65536 + ZE_KERNEL_GET_BINARY_EXP_VERSION_CURRENT = 65536 + ZE_KERNEL_GET_BINARY_EXP_VERSION_FORCE_UINT32 = 2147483647 +end + +const ze_kernel_get_binary_exp_version_t = _ze_kernel_get_binary_exp_version_t + +@checked function zeKernelGetBinaryExp(hKernel, pSize, pKernelBinary) + @ccall libze_loader.zeKernelGetBinaryExp(hKernel::ze_kernel_handle_t, + pSize::Ptr{Csize_t}, + pKernelBinary::Ptr{UInt8})::ze_result_t +end + +@cenum _ze_driver_ddi_handles_ext_version_t::UInt32 begin + ZE_DRIVER_DDI_HANDLES_EXT_VERSION_1_0 = 65536 + ZE_DRIVER_DDI_HANDLES_EXT_VERSION_CURRENT = 65536 + ZE_DRIVER_DDI_HANDLES_EXT_VERSION_FORCE_UINT32 = 2147483647 +end + +const ze_driver_ddi_handles_ext_version_t = _ze_driver_ddi_handles_ext_version_t + +@cenum _ze_driver_ddi_handle_ext_flag_t::UInt32 begin + ZE_DRIVER_DDI_HANDLE_EXT_FLAG_DDI_HANDLE_EXT_SUPPORTED = 1 + ZE_DRIVER_DDI_HANDLE_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_driver_ddi_handle_ext_flag_t = _ze_driver_ddi_handle_ext_flag_t + +@cenum _ze_external_semaphore_ext_version_t::UInt32 begin + ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_1_0 = 65536 + ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_CURRENT = 65536 + ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_FORCE_UINT32 = 2147483647 +end + +const ze_external_semaphore_ext_version_t = _ze_external_semaphore_ext_version_t + +mutable struct _ze_external_semaphore_ext_handle_t end + +const ze_external_semaphore_ext_handle_t = Ptr{_ze_external_semaphore_ext_handle_t} + +@cenum _ze_external_semaphore_ext_flag_t::UInt32 begin + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_FD = 1 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_WIN32 = 2 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_OPAQUE_WIN32_KMT = 4 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D12_FENCE = 8 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_D3D11_FENCE = 16 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_KEYED_MUTEX = 32 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_KEYED_MUTEX_KMT = 64 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_FD = 128 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_VK_TIMELINE_SEMAPHORE_WIN32 = 256 + ZE_EXTERNAL_SEMAPHORE_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_external_semaphore_ext_flag_t = _ze_external_semaphore_ext_flag_t + +@checked function zeDeviceImportExternalSemaphoreExt(hDevice, desc, phSemaphore) + @ccall libze_loader.zeDeviceImportExternalSemaphoreExt(hDevice::ze_device_handle_t, + desc::Ptr{ze_external_semaphore_ext_desc_t}, + phSemaphore::Ptr{ze_external_semaphore_ext_handle_t})::ze_result_t +end + +@checked function zeDeviceReleaseExternalSemaphoreExt(hSemaphore) + @ccall libze_loader.zeDeviceReleaseExternalSemaphoreExt(hSemaphore::ze_external_semaphore_ext_handle_t)::ze_result_t +end + +@checked function zeCommandListAppendSignalExternalSemaphoreExt(hCommandList, numSemaphores, + phSemaphores, signalParams, + hSignalEvent, numWaitEvents, + phWaitEvents) + @ccall libze_loader.zeCommandListAppendSignalExternalSemaphoreExt(hCommandList::ze_command_list_handle_t, + numSemaphores::UInt32, + phSemaphores::Ptr{ze_external_semaphore_ext_handle_t}, + signalParams::Ptr{ze_external_semaphore_signal_params_ext_t}, + hSignalEvent::ze_event_handle_t, + numWaitEvents::UInt32, + phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t +end + +@checked function zeCommandListAppendWaitExternalSemaphoreExt(hCommandList, numSemaphores, + phSemaphores, waitParams, + hSignalEvent, numWaitEvents, + phWaitEvents) + @ccall libze_loader.zeCommandListAppendWaitExternalSemaphoreExt(hCommandList::ze_command_list_handle_t, + numSemaphores::UInt32, + phSemaphores::Ptr{ze_external_semaphore_ext_handle_t}, + waitParams::Ptr{ze_external_semaphore_wait_params_ext_t}, + hSignalEvent::ze_event_handle_t, + numWaitEvents::UInt32, + phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t +end + +@cenum _ze_device_cache_line_size_ext_version_t::UInt32 begin + ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_1_0 = 65536 + ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_CURRENT = 65536 + ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_FORCE_UINT32 = 2147483647 +end + +const ze_device_cache_line_size_ext_version_t = _ze_device_cache_line_size_ext_version_t + +@cenum _ze_rtas_device_ext_flag_t::UInt32 begin + ZE_RTAS_DEVICE_EXT_FLAG_RESERVED = 1 + ZE_RTAS_DEVICE_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_device_ext_flag_t = _ze_rtas_device_ext_flag_t + +@cenum _ze_rtas_builder_ext_flag_t::UInt32 begin + ZE_RTAS_BUILDER_EXT_FLAG_RESERVED = 1 + ZE_RTAS_BUILDER_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_ext_flag_t = _ze_rtas_builder_ext_flag_t + +@cenum _ze_rtas_parallel_operation_ext_flag_t::UInt32 begin + ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_RESERVED = 1 + ZE_RTAS_PARALLEL_OPERATION_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_parallel_operation_ext_flag_t = _ze_rtas_parallel_operation_ext_flag_t + +const ze_rtas_builder_geometry_ext_flags_t = UInt32 + +@cenum _ze_rtas_builder_geometry_ext_flag_t::UInt32 begin + ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_NON_OPAQUE = 1 + ZE_RTAS_BUILDER_GEOMETRY_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_geometry_ext_flag_t = _ze_rtas_builder_geometry_ext_flag_t + +const ze_rtas_builder_instance_ext_flags_t = UInt32 + +@cenum _ze_rtas_builder_instance_ext_flag_t::UInt32 begin + ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_CULL_DISABLE = 1 + ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FRONT_COUNTERCLOCKWISE = 2 + ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FORCE_OPAQUE = 4 + ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_TRIANGLE_FORCE_NON_OPAQUE = 8 + ZE_RTAS_BUILDER_INSTANCE_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_instance_ext_flag_t = _ze_rtas_builder_instance_ext_flag_t + +@cenum _ze_rtas_builder_build_op_ext_flag_t::UInt32 begin + ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_COMPACT = 1 + ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION = 2 + ZE_RTAS_BUILDER_BUILD_OP_EXT_FLAG_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_build_op_ext_flag_t = _ze_rtas_builder_build_op_ext_flag_t + +@cenum _ze_rtas_builder_geometry_type_ext_t::UInt32 begin + ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_TRIANGLES = 0 + ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_QUADS = 1 + ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_PROCEDURAL = 2 + ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_INSTANCE = 3 + ZE_RTAS_BUILDER_GEOMETRY_TYPE_EXT_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_geometry_type_ext_t = _ze_rtas_builder_geometry_type_ext_t + +@cenum _ze_rtas_builder_input_data_format_ext_t::UInt32 begin + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3 = 0 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_COLUMN_MAJOR = 1 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_ALIGNED_COLUMN_MAJOR = 2 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FLOAT3X4_ROW_MAJOR = 3 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_AABB = 4 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_TRIANGLE_INDICES_UINT32 = 5 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_QUAD_INDICES_UINT32 = 6 + ZE_RTAS_BUILDER_INPUT_DATA_FORMAT_EXT_FORCE_UINT32 = 2147483647 +end + +const ze_rtas_builder_input_data_format_ext_t = _ze_rtas_builder_input_data_format_ext_t + +mutable struct _ze_rtas_builder_ext_handle_t end + +const ze_rtas_builder_ext_handle_t = Ptr{_ze_rtas_builder_ext_handle_t} + +mutable struct _ze_rtas_parallel_operation_ext_handle_t end + +const ze_rtas_parallel_operation_ext_handle_t = Ptr{_ze_rtas_parallel_operation_ext_handle_t} + +@checked function zeRTASBuilderCreateExt(hDriver, pDescriptor, phBuilder) + @ccall libze_loader.zeRTASBuilderCreateExt(hDriver::ze_driver_handle_t, + pDescriptor::Ptr{ze_rtas_builder_ext_desc_t}, + phBuilder::Ptr{ze_rtas_builder_ext_handle_t})::ze_result_t +end + +@checked function zeRTASBuilderGetBuildPropertiesExt(hBuilder, pBuildOpDescriptor, + pProperties) + @ccall libze_loader.zeRTASBuilderGetBuildPropertiesExt(hBuilder::ze_rtas_builder_ext_handle_t, + pBuildOpDescriptor::Ptr{ze_rtas_builder_build_op_ext_desc_t}, + pProperties::Ptr{ze_rtas_builder_ext_properties_t})::ze_result_t +end + +@checked function zeDriverRTASFormatCompatibilityCheckExt(hDriver, rtasFormatA, rtasFormatB) + @ccall libze_loader.zeDriverRTASFormatCompatibilityCheckExt(hDriver::ze_driver_handle_t, + rtasFormatA::ze_rtas_format_ext_t, + rtasFormatB::ze_rtas_format_ext_t)::ze_result_t +end + +@checked function zeRTASBuilderBuildExt(hBuilder, pBuildOpDescriptor, pScratchBuffer, + scratchBufferSizeBytes, pRtasBuffer, + rtasBufferSizeBytes, hParallelOperation, + pBuildUserPtr, pBounds, pRtasBufferSizeBytes) + @ccall libze_loader.zeRTASBuilderBuildExt(hBuilder::ze_rtas_builder_ext_handle_t, + pBuildOpDescriptor::Ptr{ze_rtas_builder_build_op_ext_desc_t}, + pScratchBuffer::Ptr{Cvoid}, + scratchBufferSizeBytes::Csize_t, + pRtasBuffer::Ptr{Cvoid}, + rtasBufferSizeBytes::Csize_t, + hParallelOperation::ze_rtas_parallel_operation_ext_handle_t, + pBuildUserPtr::Ptr{Cvoid}, + pBounds::Ptr{ze_rtas_aabb_ext_t}, + pRtasBufferSizeBytes::Ptr{Csize_t})::ze_result_t +end + +@checked function zeRTASBuilderCommandListAppendCopyExt(hCommandList, dstptr, srcptr, size, + hSignalEvent, numWaitEvents, + phWaitEvents) + @ccall libze_loader.zeRTASBuilderCommandListAppendCopyExt(hCommandList::ze_command_list_handle_t, + dstptr::Ptr{Cvoid}, + srcptr::Ptr{Cvoid}, + size::Csize_t, + hSignalEvent::ze_event_handle_t, + numWaitEvents::UInt32, + phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t +end + +@checked function zeRTASBuilderDestroyExt(hBuilder) + @ccall libze_loader.zeRTASBuilderDestroyExt(hBuilder::ze_rtas_builder_ext_handle_t)::ze_result_t +end + +@checked function zeRTASParallelOperationCreateExt(hDriver, phParallelOperation) + @ccall libze_loader.zeRTASParallelOperationCreateExt(hDriver::ze_driver_handle_t, + phParallelOperation::Ptr{ze_rtas_parallel_operation_ext_handle_t})::ze_result_t +end + +@checked function zeRTASParallelOperationGetPropertiesExt(hParallelOperation, pProperties) + @ccall libze_loader.zeRTASParallelOperationGetPropertiesExt(hParallelOperation::ze_rtas_parallel_operation_ext_handle_t, + pProperties::Ptr{ze_rtas_parallel_operation_ext_properties_t})::ze_result_t +end + +@checked function zeRTASParallelOperationJoinExt(hParallelOperation) + @ccall libze_loader.zeRTASParallelOperationJoinExt(hParallelOperation::ze_rtas_parallel_operation_ext_handle_t)::ze_result_t +end + +@checked function zeRTASParallelOperationDestroyExt(hParallelOperation) + @ccall libze_loader.zeRTASParallelOperationDestroyExt(hParallelOperation::ze_rtas_parallel_operation_ext_handle_t)::ze_result_t +end + +@cenum _ze_device_vector_sizes_ext_version_t::UInt32 begin + ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_1_0 = 65536 + ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_CURRENT = 65536 + ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_FORCE_UINT32 = 2147483647 +end + +const ze_device_vector_sizes_ext_version_t = _ze_device_vector_sizes_ext_version_t + +@checked function zeDeviceGetVectorWidthPropertiesExt(hDevice, pCount, + pVectorWidthProperties) + @ccall libze_loader.zeDeviceGetVectorWidthPropertiesExt(hDevice::ze_device_handle_t, + pCount::Ptr{UInt32}, + pVectorWidthProperties::Ptr{ze_device_vector_width_properties_ext_t})::ze_result_t +end + @cenum _ze_cache_reservation_ext_version_t::UInt32 begin ZE_CACHE_RESERVATION_EXT_VERSION_1_0 = 65536 ZE_CACHE_RESERVATION_EXT_VERSION_CURRENT = 65536 @@ -3619,6 +4287,7 @@ const ze_bindless_image_exp_version_t = _ze_bindless_image_exp_version_t @cenum _ze_image_bindless_exp_flag_t::UInt32 begin ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS = 1 + ZE_IMAGE_BINDLESS_EXP_FLAG_SAMPLED_IMAGE = 2 ZE_IMAGE_BINDLESS_EXP_FLAG_FORCE_UINT32 = 2147483647 end @@ -3673,7 +4342,8 @@ end @cenum _ze_mutable_command_list_exp_version_t::UInt32 begin ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_0 = 65536 - ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_CURRENT = 65536 + ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_1 = 65537 + ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_CURRENT = 65537 ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_FORCE_UINT32 = 2147483647 end @@ -3686,6 +4356,8 @@ const ze_mutable_command_list_exp_version_t = _ze_mutable_command_list_exp_versi ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET = 8 ZE_MUTABLE_COMMAND_EXP_FLAG_SIGNAL_EVENT = 16 ZE_MUTABLE_COMMAND_EXP_FLAG_WAIT_EVENTS = 32 + ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_INSTRUCTION = 64 + ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENTS = 128 ZE_MUTABLE_COMMAND_EXP_FLAG_FORCE_UINT32 = 2147483647 end @@ -3704,6 +4376,16 @@ const ze_mutable_command_list_exp_flag_t = _ze_mutable_command_list_exp_flag_t pCommandId::Ptr{UInt64})::ze_result_t end +@checked function zeCommandListGetNextCommandIdWithKernelsExp(hCommandList, desc, + numKernels, phKernels, + pCommandId) + @ccall libze_loader.zeCommandListGetNextCommandIdWithKernelsExp(hCommandList::ze_command_list_handle_t, + desc::Ptr{ze_mutable_command_id_exp_desc_t}, + numKernels::UInt32, + phKernels::Ptr{ze_kernel_handle_t}, + pCommandId::Ptr{UInt64})::ze_result_t +end + @checked function zeCommandListUpdateMutableCommandsExp(hCommandList, desc) @ccall libze_loader.zeCommandListUpdateMutableCommandsExp(hCommandList::ze_command_list_handle_t, desc::Ptr{ze_mutable_commands_exp_desc_t})::ze_result_t @@ -3725,6 +4407,14 @@ end phWaitEvents::Ptr{ze_event_handle_t})::ze_result_t end +@checked function zeCommandListUpdateMutableCommandKernelsExp(hCommandList, numKernels, + pCommandId, phKernels) + @ccall libze_loader.zeCommandListUpdateMutableCommandKernelsExp(hCommandList::ze_command_list_handle_t, + numKernels::UInt32, + pCommandId::Ptr{UInt64}, + phKernels::Ptr{ze_kernel_handle_t})::ze_result_t +end + struct _ze_init_params_t pflags::Ptr{ze_init_flags_t} end @@ -5317,6 +6007,8 @@ const ZE_MAX_IPC_HANDLE_SIZE = 64 const ZE_MAX_UUID_SIZE = 16 +const ZE_API_VERSION_CURRENT_M = ZE_MAKE_VERSION(1, 13) + const ZE_MAX_DRIVER_UUID_SIZE = 16 const ZE_MAX_EXTENSION_NAME = 256 @@ -5343,6 +6035,18 @@ const ZE_GLOBAL_OFFSET_EXP_NAME = "ZE_experimental_global_offset" const ZE_RELAXED_ALLOCATION_LIMITS_EXP_NAME = "ZE_experimental_relaxed_allocation_limits" +const ZE_GET_KERNEL_BINARY_EXP_NAME = "ZE_extension_kernel_binary_exp" + +const ZE_DRIVER_DDI_HANDLES_EXT_NAME = "ZE_extension_driver_ddi_handles" + +const ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME = "ZE_extension_external_semaphores" + +const ZE_CACHELINE_SIZE_EXT_NAME = "ZE_extension_device_cache_line_size" + +const ZE_RTAS_EXT_NAME = "ZE_extension_rtas" + +const ZE_DEVICE_VECTOR_SIZES_EXT_NAME = "ZE_extension_device_vector_sizes" + const ZE_CACHE_RESERVATION_EXT_NAME = "ZE_extension_cache_reservation" const ZE_EVENT_QUERY_TIMESTAMPS_EXP_NAME = "ZE_experimental_event_query_timestamps" From 15971c4f1e509f50cfd351c57eea3ff71d3dc541 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Wed, 6 Aug 2025 10:33:40 -0500 Subject: [PATCH 5/5] Update software stack to NEO v25.27.34303 --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index d8ec6b0a..a970ae2e 100644 --- a/Project.toml +++ b/Project.toml @@ -37,14 +37,14 @@ GPUCompiler = "1.5" GPUToolbox = "0.1, 0.2, 0.3" KernelAbstractions = "0.9.1" LLVM = "6, 7, 8, 9" -NEO_jll = "=24.26.30049" +NEO_jll = "=25.27.34303" Preferences = "1" SPIRVIntrinsics = "0.2" SPIRV_LLVM_Translator_jll = "20" SpecialFunctions = "1.3, 2" StaticArrays = "1" julia = "1.10" -oneAPI_Level_Zero_Loader_jll = "1.9" +oneAPI_Level_Zero_Loader_jll = "1.22" oneAPI_Support_jll = "0.8" [extras]