diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index aa7bd830e4bc7..1baa2d43675f7 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 22962057df1b9d538e08088a7b75d9d8e7c29f90 (HEAD, origin/main, origin/HEAD) - # Merge: e824ddc2 f0a1c433 + # commit 532a4ecb72da4876cef61a4ae4d638e27ad609d5 + # Merge: 22962057 d944ff33 # Author: aarongreig - # Date: Fri Sep 27 16:54:04 2024 +0100 - # Merge pull request #2017 from nrspruit/new_sysman_init - # [L0] Use zesInit for SysMan API usage - set(UNIFIED_RUNTIME_TAG 22962057df1b9d538e08088a7b75d9d8e7c29f90) + # Date: Mon Sep 30 10:43:10 2024 +0100 + # Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update + # Add support for command-buffer kernel updates + set(UNIFIED_RUNTIME_TAG 532a4ecb72da4876cef61a4ae4d638e27ad609d5) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 084cb5a6ec2a7..b184c30cad6fd 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -704,17 +704,26 @@ bool device_impl::has(aspect Aspect) const { return CallSuccessful && Result != nullptr; } case aspect::ext_oneapi_graph: { - bool SupportsCommandBufferUpdate = false; + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; bool CallSuccessful = getAdapter()->call_nocheck( - MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate, + MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP, + sizeof(UpdateCapabilities), &UpdateCapabilities, nullptr) == UR_RESULT_SUCCESS; if (!CallSuccessful) { return false; } - return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate; + /* The kernel handle update capability is not yet required for the + * ext_oneapi_graph aspect */ + ur_device_command_buffer_update_capability_flags_t RequiredCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + + return has(aspect::ext_oneapi_limited_graph) && + (UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities; } case aspect::ext_oneapi_limited_graph: { bool SupportsCommandBuffers = false; diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index acdf6baf50475..3fbda06f69b62 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1474,6 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr Node) { } } + UpdateDesc.hNewKernel = UrKernel; UpdateDesc.numNewMemObjArgs = MemobjDescs.size(); UpdateDesc.pNewMemObjArgList = MemobjDescs.data(); UpdateDesc.numNewPointerArgs = PtrDescs.size(); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index acc61ab39d77d..41570dd11a3c1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2538,7 +2538,7 @@ ur_result_t enqueueImpCommandBufferKernel( ur_result_t Res = Adapter->call_nocheck( CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], - &NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(), + &NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(), SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint, OutCommand); diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 71c9962155b2a..8cf9b69ebc550 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -200,14 +200,27 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) { case UR_DEVICE_INFO_AVAILABLE: case UR_DEVICE_INFO_LINKER_AVAILABLE: case UR_DEVICE_INFO_COMPILER_AVAILABLE: - case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { if (*params->ppPropValue) *static_cast(*params->ppPropValue) = true; if (*params->ppPropSizeRet) **params->ppPropSizeRet = sizeof(true); return UR_RESULT_SUCCESS; } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + if (*params->ppPropValue) + *static_cast( + *params->ppPropValue) = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + if (*params->ppPropSizeRet) + **params->ppPropSizeRet = + sizeof(ur_device_command_buffer_update_capability_flags_t); + return UR_RESULT_SUCCESS; + } // This mock GPU device has no sub-devices case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { if (*params->ppPropSizeRet) {