diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit index 78b97bc53d..ccdde2d8ec 100644 --- a/.github/intel-llvm-mirror-base-commit +++ b/.github/intel-llvm-mirror-base-commit @@ -1 +1 @@ -8959a5e5a6cebac8993c58c5597638b4510be91f +84518c193adb9d8b03ae449345d892c6c9984846 diff --git a/include/ur_api.h b/include/ur_api.h index 1bba8a950e..23b33c0f2e 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8616,18 +8616,20 @@ typedef enum ur_map_flag_t { #define UR_MAP_FLAGS_MASK 0xfffffff8 /////////////////////////////////////////////////////////////////////////////// -/// @brief Map flags +/// @brief USM migration flags, indicating the direction data is migrated in typedef uint32_t ur_usm_migration_flags_t; typedef enum ur_usm_migration_flag_t { - /// Default migration TODO: Add more enums! - UR_USM_MIGRATION_FLAG_DEFAULT = UR_BIT(0), + /// Migrate data from host to device + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE = UR_BIT(0), + /// Migrate data from device to host + UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST = UR_BIT(1), /// @cond UR_USM_MIGRATION_FLAG_FORCE_UINT32 = 0x7fffffff /// @endcond } ur_usm_migration_flag_t; /// @brief Bit Mask for validating ur_usm_migration_flags_t -#define UR_USM_MIGRATION_FLAGS_MASK 0xfffffffe +#define UR_USM_MIGRATION_FLAGS_MASK 0xfffffffc /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to map a region of the buffer object into the host @@ -11893,7 +11895,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 91c9973a3a..17a8a5267e 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -11031,8 +11031,11 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value) { switch (value) { - case UR_USM_MIGRATION_FLAG_DEFAULT: - os << "UR_USM_MIGRATION_FLAG_DEFAULT"; + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + os << "UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE"; + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + os << "UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST"; break; default: os << "unknown enumerator"; @@ -11050,15 +11053,26 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t val = flag; bool first = true; - if ((val & UR_USM_MIGRATION_FLAG_DEFAULT) == - (uint32_t)UR_USM_MIGRATION_FLAG_DEFAULT) { - val ^= (uint32_t)UR_USM_MIGRATION_FLAG_DEFAULT; + if ((val & UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE) == + (uint32_t)UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE) { + val ^= (uint32_t)UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE; + } + + if ((val & UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST) == + (uint32_t)UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST) { + val ^= (uint32_t)UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST; if (!first) { os << " | "; } else { first = false; } - os << UR_USM_MIGRATION_FLAG_DEFAULT; + os << UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST; } if (val != 0) { std::bitset<32> bits(val); diff --git a/scripts/core/enqueue.yml b/scripts/core/enqueue.yml index 20d7d7bc2a..a6148bd366 100644 --- a/scripts/core/enqueue.yml +++ b/scripts/core/enqueue.yml @@ -915,13 +915,16 @@ etors: value: "$X_BIT(2)" --- #-------------------------------------------------------------------------- type: enum -desc: "Map flags" -class: $xDevice +desc: "USM migration flags, indicating the direction data is migrated in" +class: $xEnqueue name: $x_usm_migration_flags_t etors: - - name: DEFAULT - desc: "Default migration TODO: Add more enums! " + - name: HOST_TO_DEVICE + desc: "Migrate data from host to device" value: "$X_BIT(0)" + - name: DEVICE_TO_HOST + desc: "Migrate data from device to host" + value: "$X_BIT(1)" --- #-------------------------------------------------------------------------- type: function desc: "Enqueue a command to map a region of the buffer object into the host address space and return a pointer to the mapped region" diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index e8f2caa15d..a194777f9e 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -1025,7 +1025,7 @@ params: desc: "[in] size in bytes to be fetched." - type: $x_usm_migration_flags_t name: flags - desc: "[in] USM prefetch flags" + desc: "[in] USM migration flags" - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 6f4a5bce3c..36f7535be7 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -1516,14 +1516,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t /*flags*/, uint32_t numEventsInWaitList, + ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + ur_device_handle_t Device = hQueue->getDevice(); +#if CUDA_VERSION >= 13000 + CUmemLocation Location; + switch (flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + Location.type = CU_MEM_LOCATION_TYPE_DEVICE; + Location.id = Device->get(); + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + Location.type = CU_MEM_LOCATION_TYPE_HOST; + break; +#else + int dstDevice; + switch (flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + dstDevice = Device->get(); + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + dstDevice = CU_DEVICE_CPU; + break; +#endif + default: + setErrorMessage("Invalid USM migration flag", + UR_RESULT_ERROR_INVALID_ENUMERATION); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + size_t PointerRangeSize = 0; UR_CHECK_ERROR(cuPointerGetAttribute( &PointerRangeSize, CU_POINTER_ATTRIBUTE_RANGE_SIZE, (CUdeviceptr)pMem)); UR_ASSERT(size <= PointerRangeSize, UR_RESULT_ERROR_INVALID_SIZE); - ur_device_handle_t Device = hQueue->getDevice(); std::unique_ptr EventPtr{nullptr}; try { @@ -1564,15 +1590,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( } #if CUDA_VERSION >= 13000 - CUmemLocation Location; - Location.id = Device->get(); - Location.type = CU_MEM_LOCATION_TYPE_DEVICE; unsigned int Flags = 0U; UR_CHECK_ERROR( cuMemPrefetchAsync((CUdeviceptr)pMem, size, Location, Flags, CuStream)); #else UR_CHECK_ERROR( - cuMemPrefetchAsync((CUdeviceptr)pMem, size, Device->get(), CuStream)); + cuMemPrefetchAsync((CUdeviceptr)pMem, size, dstDevice, CuStream)); #endif } catch (ur_result_t Err) { return Err; diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index dc0fac8050..2206fbbf3e 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1324,11 +1324,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t /*flags*/, uint32_t numEventsInWaitList, + ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - void *HIPDevicePtr = const_cast(pMem); ur_device_handle_t Device = hQueue->getDevice(); + hipDevice_t TargetDevice; + switch (flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + TargetDevice = Device->get(); + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + TargetDevice = hipCpuDeviceId; + break; + default: + setErrorMessage("Invalid USM migration flag", + UR_RESULT_ERROR_INVALID_ENUMERATION); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + void *HIPDevicePtr = const_cast(pMem); // HIP_POINTER_ATTRIBUTE_RANGE_SIZE is not an attribute in ROCM < 5, // so we can't perform this check for such cases. @@ -1385,8 +1398,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return UR_RESULT_SUCCESS; } - UR_CHECK_ERROR( - hipMemPrefetchAsync(pMem, size, hQueue->getDevice()->get(), HIPStream)); + UR_CHECK_ERROR(hipMemPrefetchAsync(pMem, size, TargetDevice, HIPStream)); releaseEvent(); } catch (ur_result_t Err) { return Err; diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 362a2479cd..9de4138f7e 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -506,72 +506,71 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() bool forceLoadedAdapter = ur_getenv("UR_ADAPTERS_FORCE_LOAD").has_value(); if (!forceLoadedAdapter) { #ifdef UR_ADAPTER_LEVEL_ZERO_V2 - auto [useV2, reason] = shouldUseV2Adapter(); - if (!useV2) { - UR_LOG(INFO, "Skipping L0 V2 adapter: {}", reason); - return; - } + auto [useV2, reason] = shouldUseV2Adapter(); + if (!useV2) { + UR_LOG(INFO, "Skipping L0 V2 adapter: {}", reason); + return; + } #else - auto [useV1, reason] = shouldUseV1Adapter(); - if (!useV1) { - UR_LOG(INFO, "Skipping L0 V1 adapter: {}", reason); - return; - } + auto [useV1, reason] = shouldUseV1Adapter(); + if (!useV1) { + UR_LOG(INFO, "Skipping L0 V1 adapter: {}", reason); + return; + } #endif } - // Check if the user has enabled the default L0 SysMan initialization. - const int UrSysmanZesinitEnable = [&UserForcedSysManInit] { - const char *UrRet = std::getenv("UR_L0_ENABLE_ZESINIT_DEFAULT"); - if (!UrRet) - return 0; - UserForcedSysManInit &= 2; - return std::atoi(UrRet); - }(); - - bool ZesInitNeeded = UrSysmanZesinitEnable && !UrSysManEnvInitEnabled; - // Unless the user has forced the SysMan init, we will check the device - // version to see if the zesInit is needed. - if (UserForcedSysManInit == 0 && checkDeviceIntelGPUIpVersionOrNewer( - 0x05004000) == UR_RESULT_SUCCESS) { - if (UrSysManEnvInitEnabled) { - setEnvVar("ZES_ENABLE_SYSMAN", "0"); - } - ZesInitNeeded = true; - } - if (ZesInitNeeded) { + // Check if the user has enabled the default L0 SysMan initialization. + const int UrSysmanZesinitEnable = [&UserForcedSysManInit] { + const char *UrRet = std::getenv("UR_L0_ENABLE_ZESINIT_DEFAULT"); + if (!UrRet) + return 0; + UserForcedSysManInit &= 2; + return std::atoi(UrRet); + }(); + + bool ZesInitNeeded = UrSysmanZesinitEnable && !UrSysManEnvInitEnabled; + // Unless the user has forced the SysMan init, we will check the device + // version to see if the zesInit is needed. + if (UserForcedSysManInit == 0 && + checkDeviceIntelGPUIpVersionOrNewer(0x05004000) == UR_RESULT_SUCCESS) { + if (UrSysManEnvInitEnabled) { + setEnvVar("ZES_ENABLE_SYSMAN", "0"); + } + ZesInitNeeded = true; + } + if (ZesInitNeeded) { #ifdef UR_STATIC_LEVEL_ZERO - getDeviceByUUIdFunctionPtr = zesDriverGetDeviceByUuidExp; - getSysManDriversFunctionPtr = zesDriverGet; - sysManInitFunctionPtr = zesInit; + getDeviceByUUIdFunctionPtr = zesDriverGetDeviceByUuidExp; + getSysManDriversFunctionPtr = zesDriverGet; + sysManInitFunctionPtr = zesInit; #else - getDeviceByUUIdFunctionPtr = (zes_pfnDriverGetDeviceByUuidExp_t) - ur_loader::LibLoader::getFunctionPtr(processHandle, - "zesDriverGetDeviceByUuidExp"); - getSysManDriversFunctionPtr = - (zes_pfnDriverGet_t)ur_loader::LibLoader::getFunctionPtr( - processHandle, "zesDriverGet"); - sysManInitFunctionPtr = - (zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(processHandle, - "zesInit"); + getDeviceByUUIdFunctionPtr = + (zes_pfnDriverGetDeviceByUuidExp_t)ur_loader::LibLoader::getFunctionPtr( + processHandle, "zesDriverGetDeviceByUuidExp"); + getSysManDriversFunctionPtr = + (zes_pfnDriverGet_t)ur_loader::LibLoader::getFunctionPtr( + processHandle, "zesDriverGet"); + sysManInitFunctionPtr = (zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr( + processHandle, "zesInit"); #endif - } - if (getDeviceByUUIdFunctionPtr && getSysManDriversFunctionPtr && - sysManInitFunctionPtr) { - ze_init_flags_t L0ZesInitFlags = 0; - UR_LOG(DEBUG, "\nzesInit with flags value of {}\n", - static_cast(L0ZesInitFlags)); - ZesResult = ZE_CALL_NOCHECK(sysManInitFunctionPtr, (L0ZesInitFlags)); - } else { - ZesResult = ZE_RESULT_ERROR_UNINITIALIZED; - } + } + if (getDeviceByUUIdFunctionPtr && getSysManDriversFunctionPtr && + sysManInitFunctionPtr) { + ze_init_flags_t L0ZesInitFlags = 0; + UR_LOG(DEBUG, "\nzesInit with flags value of {}\n", + static_cast(L0ZesInitFlags)); + ZesResult = ZE_CALL_NOCHECK(sysManInitFunctionPtr, (L0ZesInitFlags)); + } else { + ZesResult = ZE_RESULT_ERROR_UNINITIALIZED; + } - ur_result_t err = initPlatforms(this, platforms, ZesResult); - if (err == UR_RESULT_SUCCESS) { - Platforms = std::move(platforms); - } else { - throw err; - } + ur_result_t err = initPlatforms(this, platforms, ZesResult); + if (err == UR_RESULT_SUCCESS) { + Platforms = std::move(platforms); + } else { + throw err; + } } void globalAdapterOnDemandCleanup() { diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 1e68069db5..687c905417 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1313,7 +1313,7 @@ ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, - ur_usm_migration_flags_t /*Flags*/, uint32_t NumSyncPointsInWaitList, + ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, uint32_t /*NumEventsInWaitList*/, const ur_event_handle_t * /*EventWaitList*/, @@ -1327,6 +1327,17 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp( UR_COMMAND_USM_PREFETCH, CommandBuffer, CommandBuffer->ZeComputeCommandList, NumSyncPointsInWaitList, SyncPointWaitList, true, RetSyncPoint, ZeEventList, ZeLaunchEvent)); + switch (Flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + UR_LOG(WARN, "commandBufferAppendUSMPrefetch: L0 does not support prefetch " + "to host yet"); + break; + default: + UR_LOG(ERR, "commandBufferAppendUSMPrefetch: invalid USM migration flag"); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } if (!ZeEventList.empty()) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, @@ -1335,9 +1346,11 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp( } // Add the prefetch command to the command-buffer. - // Note that L0 does not handle migration flags. - ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, - (CommandBuffer->ZeComputeCommandList, Mem, Size)); + // TODO Support migration flags after L0 backend support is added. + if (Flags == UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE) { + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, + (CommandBuffer->ZeComputeCommandList, Mem, Size)); + } if (!CommandBuffer->IsInOrderCmdList) { // Level Zero does not have a completion "event" with the prefetch API, diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 3b1158645e..107fcc2d1c 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -1265,7 +1265,7 @@ ur_result_t urEnqueueUSMPrefetch( /// [in] size in bytes to be fetched size_t Size, /// [in] USM prefetch flags - ur_usm_migration_flags_t /*Flags*/, + ur_usm_migration_flags_t Flags, /// [in] size of the event wait list uint32_t NumEventsInWaitList, /// [in][optional][range(0, numEventsInWaitList)] pointer to a list of @@ -1276,6 +1276,18 @@ ur_result_t urEnqueueUSMPrefetch( /// [in,out][optional] return an event object that identifies this /// particular command instance. ur_event_handle_t *OutEvent) { + switch (Flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + UR_LOG(WARN, + "enqueueUSMPrefetch: L0 does not support prefetch to host yet"); + break; + default: + UR_LOG(ERR, "enqueueUSMPrefetch: invalid USM migration flag"); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); @@ -1315,8 +1327,10 @@ ur_result_t urEnqueueUSMPrefetch( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (ZeCommandList, WaitList.Length, WaitList.ZeEventList)); } - // TODO: figure out how to translate "flags" - ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (ZeCommandList, Mem, Size)); + // TODO: Support migration flags after L0 backend support is added + if (Flags == UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE) { + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (ZeCommandList, Mem, Size)); + } // TODO: Level Zero does not have a completion "event" with the prefetch API, // so manually add command to signal our event. diff --git a/source/adapters/level_zero/v2/command_list_manager.cpp b/source/adapters/level_zero/v2/command_list_manager.cpp index 728db1360b..753ad2e0af 100644 --- a/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/source/adapters/level_zero/v2/command_list_manager.cpp @@ -283,11 +283,23 @@ ur_result_t ur_command_list_manager::appendUSMFill( } ur_result_t ur_command_list_manager::appendUSMPrefetch( - const void *pMem, size_t size, ur_usm_migration_flags_t /*flags*/, + const void *pMem, size_t size, ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t phEvent) { TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMPrefetch"); + switch (flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + UR_LOG(WARN, + "appendUSMPrefetch: L0v2 does not support prefetch to host yet"); + break; + default: + UR_LOG(ERR, "appendUSMPrefetch: invalid USM migration flag"); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -296,9 +308,11 @@ ur_result_t ur_command_list_manager::appendUSMPrefetch( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (zeCommandList.get(), numWaitEvents, pWaitEvents)); } - // TODO: figure out how to translate "flags" - ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, - (zeCommandList.get(), pMem, size)); + // TODO: Support migration flags after L0 backend support is added + if (flags == UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE) { + ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, + (zeCommandList.get(), pMem, size)); + } if (zeSignalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, (zeCommandList.get(), zeSignalEvent)); diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 39d67fff43..7956f048db 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -10681,7 +10681,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 0cfa916e49..fc335186fc 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -187,6 +187,7 @@ CONSTFIX char CreateBufferWithPropertiesName[] = CONSTFIX char SetKernelArgMemPointerName[] = "clSetKernelArgMemPointerINTEL"; CONSTFIX char EnqueueMemFillName[] = "clEnqueueMemFillINTEL"; CONSTFIX char EnqueueMemcpyName[] = "clEnqueueMemcpyINTEL"; +CONSTFIX char EnqueueMigrateMemName[] = "clEnqueueMigrateMemINTEL"; CONSTFIX char GetMemAllocInfoName[] = "clGetMemAllocInfoINTEL"; CONSTFIX char SetProgramSpecializationConstantName[] = "clSetProgramSpecializationConstant"; diff --git a/source/adapters/opencl/extension_functions.def b/source/adapters/opencl/extension_functions.def index c7b4861807..47e85f918a 100644 --- a/source/adapters/opencl/extension_functions.def +++ b/source/adapters/opencl/extension_functions.def @@ -8,6 +8,7 @@ CL_EXTENSION_FUNC(clMemBlockingFreeINTEL) CL_EXTENSION_FUNC(clSetKernelArgMemPointerINTEL) CL_EXTENSION_FUNC(clEnqueueMemFillINTEL) CL_EXTENSION_FUNC(clEnqueueMemcpyINTEL) +CL_EXTENSION_FUNC(clEnqueueMigrateMemINTEL) CL_EXTENSION_FUNC(clGetMemAllocInfoINTEL) CL_EXTENSION_FUNC(clEnqueueWriteGlobalVariable) CL_EXTENSION_FUNC(clEnqueueReadGlobalVariable) diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index e3c510c745..09cf31aee8 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -524,36 +524,60 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( [[maybe_unused]] ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - cl_event Event; + // TODO: Uncomment implementation when issues with impl are resolved. + + // cl_mem_migration_flags MigrationFlag; + switch (flags) { + case UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE: + // Note: currently opencl:cpu will break with this value, but opencl:gpu + // will work just fine. A spec change has been made to address this issue, + // and is waiting to be implemented: + // https://github.com/KhronosGroup/OpenCL-Docs/pull/1412/files#diff-7e4c12789cfc81c40637d32b7113b0cca2c3ee0beabaabb9acd9da743f7b5780R974 + + // MigrationFlag = 0; // OpenCL spec stipulates 0 as host + break; + case UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST: + // Note: there is currently no driver support for this. + + // MigrationFlag = CL_MIGRATE_MEM_OBJECT_HOST; + break; + default: + cl_adapter::setErrorMessage("Invalid USM migration flag", + UR_RESULT_ERROR_INVALID_ENUMERATION); + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + /* + // Have to look up the context from the kernel + cl_context CLContext = hQueue->Context->CLContext; + + clEnqueueMigrateMemINTEL_fn EnqueueMigrateMem = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, ur::cl::getAdapter()->fnCache.clEnqueueMigrateMemINTELCache, + cl_ext::EnqueueMigrateMemName, &EnqueueMigrateMem)); + */ + + cl_event Event = nullptr; std::vector CLWaitEvents(numEventsInWaitList); for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } + + /* + CL_RETURN_ON_FAILURE(EnqueueMigrateMem( + hQueue->CLQueue, pMem, size, MigrationFlag, numEventsInWaitList, + CLWaitEvents.data(), ifUrEvent(phEvent, Event))); + */ + + // TODO: when issues with impl are fully resolved, delete this and use + // waitlisting from EnqueueMigrateMem instead. CL_RETURN_ON_FAILURE(clEnqueueMarkerWithWaitList( hQueue->CLQueue, numEventsInWaitList, CLWaitEvents.data(), ifUrEvent(phEvent, Event))); + UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; - /* - // Use this once impls support it. - // Have to look up the context from the kernel - cl_context CLContext = hQueue->Context; - - clEnqueueMigrateMemINTEL_fn FuncPtr; - ur_result_t Err = cl_ext::getExtFuncFromContext( - CLContext, "clEnqueueMigrateMemINTEL", &FuncPtr); - - ur_result_t RetVal; - if (Err != UR_RESULT_SUCCESS) { - RetVal = Err; - } else { - RetVal = map_cl_error_to_ur( - FuncPtr(hQueue->CLQueue, pMem, size, flags, - numEventsInWaitList, - reinterpret_cast(phEventWaitList), - reinterpret_cast(phEvent))); - } - */ } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index e96e1cbffd..d096d3895c 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9039,7 +9039,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 6f33aaa856..82e898fab8 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -9822,7 +9822,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 5c2c3a41af..75ae04bc5a 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -5152,7 +5152,7 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index a31b639ae5..286df652fe 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -9458,7 +9458,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index da84b7f50f..be7e5662cd 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -8239,7 +8239,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( const void *pMemory, /// [in] size in bytes to be fetched. size_t size, - /// [in] USM prefetch flags + /// [in] USM migration flags ur_usm_migration_flags_t flags, /// [in] The number of sync points in the provided dependency list. uint32_t numSyncPointsInWaitList, diff --git a/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp b/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp index e0cb371ff0..88ef85cd93 100644 --- a/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp +++ b/test/conformance/enqueue/urEnqueueUSMPrefetch.cpp @@ -20,7 +20,8 @@ struct urEnqueueUSMPrefetchWithParamTest UUR_DEVICE_TEST_SUITE_WITH_PARAM( urEnqueueUSMPrefetchWithParamTest, - ::testing::Values(UR_USM_MIGRATION_FLAG_DEFAULT), + ::testing::Values(UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, + UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST), uur::deviceTestWithParamPrinter); TEST_P(urEnqueueUSMPrefetchWithParamTest, Success) { @@ -102,14 +103,14 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueUSMPrefetchTest); TEST_P(urEnqueueUSMPrefetchTest, InvalidNullHandleQueue) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urEnqueueUSMPrefetch(nullptr, ptr, allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, nullptr)); } TEST_P(urEnqueueUSMPrefetchTest, InvalidNullPointerMem) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, urEnqueueUSMPrefetch(queue, nullptr, allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, nullptr)); } @@ -123,7 +124,7 @@ TEST_P(urEnqueueUSMPrefetchTest, InvalidEnumeration) { TEST_P(urEnqueueUSMPrefetchTest, InvalidSizeZero) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE, urEnqueueUSMPrefetch(queue, ptr, 0, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, nullptr)); } @@ -132,14 +133,14 @@ TEST_P(urEnqueueUSMPrefetchTest, InvalidSizeTooLarge) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE, urEnqueueUSMPrefetch(queue, ptr, allocation_size * 2, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, nullptr)); } TEST_P(urEnqueueUSMPrefetchTest, InvalidEventWaitList) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST, urEnqueueUSMPrefetch(queue, ptr, allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 1, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 1, nullptr, nullptr)); ur_event_handle_t validEvent; @@ -147,12 +148,12 @@ TEST_P(urEnqueueUSMPrefetchTest, InvalidEventWaitList) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST, urEnqueueUSMPrefetch(queue, ptr, allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, &validEvent, nullptr)); ur_event_handle_t inv_evt = nullptr; ASSERT_EQ_RESULT(urEnqueueUSMPrefetch(queue, ptr, allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 1, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 1, &inv_evt, nullptr), UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST); diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 22ac628c27..4e0ced3502 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -143,8 +143,21 @@ TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMPrefetchExp) { UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( - cmd_buf_handle, device_ptrs[0], allocation_size, 0, 0, nullptr, 0, - nullptr, nullptr, nullptr, nullptr)); + cmd_buf_handle, device_ptrs[0], allocation_size, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); +} + +TEST_P(urCommandBufferCommandsTest, + urCommandBufferAppendUSMPrefetchExpDeviceToHost) { + // No Prefetch command in cl_khr_command_buffer + // No driver support for prefetching from device to host on Intel GPUs + UUR_KNOWN_FAILURE_ON(uur::OpenCL{}, uur::LevelZero{}); + + ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( + cmd_buf_handle, device_ptrs[0], allocation_size, + UR_USM_MIGRATION_FLAG_DEVICE_TO_HOST, 0, nullptr, 0, nullptr, nullptr, + nullptr, nullptr)); } TEST_P(urCommandBufferCommandsTest, urCommandBufferAppendUSMAdviseExp) { diff --git a/test/conformance/exp_command_buffer/event_sync.cpp b/test/conformance/exp_command_buffer/event_sync.cpp index ba59205387..26ec26b2a0 100644 --- a/test/conformance/exp_command_buffer/event_sync.cpp +++ b/test/conformance/exp_command_buffer/event_sync.cpp @@ -426,9 +426,9 @@ TEST_P(CommandEventSyncTest, USMPrefetchExp) { // Test prefetch command waiting on queue event ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( - cmd_buf_handle, device_ptrs[1], allocation_size, 0 /* migration flags*/, - 0, nullptr, 1, &external_events[0], nullptr, &external_events[1], - nullptr)); + cmd_buf_handle, device_ptrs[1], allocation_size, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, 1, &external_events[0], + nullptr, &external_events[1], nullptr)); ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle)); ASSERT_SUCCESS( urEnqueueCommandBufferExp(queue, cmd_buf_handle, 0, nullptr, nullptr)); diff --git a/test/conformance/exp_command_buffer/in-order.cpp b/test/conformance/exp_command_buffer/in-order.cpp index fd6335197c..45357340ed 100644 --- a/test/conformance/exp_command_buffer/in-order.cpp +++ b/test/conformance/exp_command_buffer/in-order.cpp @@ -101,7 +101,7 @@ struct urInOrderUSMCommandBufferExpTest : urInOrderCommandBufferExpTest { if (hints) { ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( in_order_cb, device_ptrs[0], allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, nullptr, 0, nullptr, nullptr, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } @@ -124,7 +124,7 @@ struct urInOrderUSMCommandBufferExpTest : urInOrderCommandBufferExpTest { if (hints) { ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( in_order_cb, device_ptrs[0], allocation_size, - UR_USM_MIGRATION_FLAG_DEFAULT, 0, nullptr, 0, nullptr, nullptr, + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); } diff --git a/test/conformance/exp_command_buffer/update/event_sync.cpp b/test/conformance/exp_command_buffer/update/event_sync.cpp index fe0dc03728..16763eaf0c 100644 --- a/test/conformance/exp_command_buffer/update/event_sync.cpp +++ b/test/conformance/exp_command_buffer/update/event_sync.cpp @@ -723,8 +723,8 @@ TEST_P(CommandEventSyncUpdateTest, USMPrefetchExp) { // Test prefetch command waiting on queue event ASSERT_SUCCESS(urCommandBufferAppendUSMPrefetchExp( updatable_cmd_buf_handle, device_ptrs[1], allocation_size, - 0 /* migration flags*/, 0, nullptr, 1, &external_events[0], nullptr, - &external_events[1], &command_handles[0])); + UR_USM_MIGRATION_FLAG_HOST_TO_DEVICE, 0, nullptr, 1, &external_events[0], + nullptr, &external_events[1], &command_handles[0])); ASSERT_NE(nullptr, command_handles[0]); ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); ASSERT_SUCCESS(urEnqueueCommandBufferExp(queue, updatable_cmd_buf_handle, 0,