diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit index e049aa99ec..eb32b8eefe 100644 --- a/.github/intel-llvm-mirror-base-commit +++ b/.github/intel-llvm-mirror-base-commit @@ -1 +1 @@ -542a00b45276bd9a24ba85c041b0d5535a896593 +06407ab5626faccc61fb8367ac1017667045f9e1 diff --git a/source/adapters/offload/device.cpp b/source/adapters/offload/device.cpp index 9990258dae..1c026de279 100644 --- a/source/adapters/offload/device.cpp +++ b/source/adapters/offload/device.cpp @@ -85,6 +85,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: // TODO: Implement subgroups in Offload return ReturnValue(1); + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + if (pPropSizeRet) { + *pPropSizeRet = sizeof(size_t); + } + + if (pPropValue) { + uint32_t as32; + OL_RETURN_ON_ERR(olGetDeviceInfo(hDevice->OffloadDevice, + OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, + sizeof(as32), &as32)); + + *reinterpret_cast(pPropValue) = as32; + } + + return UR_RESULT_SUCCESS; case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { // OL dimensions are uint32_t while UR is size_t, so they need to be mapped if (pPropSizeRet) { @@ -94,9 +109,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (pPropValue) { ol_dimensions_t olVec; size_t *urVec = reinterpret_cast(pPropValue); - OL_RETURN_ON_ERR(olGetDeviceInfo(hDevice->OffloadDevice, - OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - sizeof(olVec), &olVec)); + OL_RETURN_ON_ERR( + olGetDeviceInfo(hDevice->OffloadDevice, + OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION, + sizeof(olVec), &olVec)); urVec[0] = olVec.x; urVec[1] = olVec.y; diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 4f20f4a29e..487c1ff9df 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -36,26 +36,6 @@ target_include_directories(ur_common PUBLIC $ ) -message(STATUS "Download Unified Memory Framework from github.com") -if (NOT DEFINED UMF_REPO) - set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") -endif() - -if (NOT DEFINED UMF_TAG) - # commit 1de269c00e46b7cbdbafa2247812c8c4bb4ed4a5 - # Author: Łukasz Stolarczuk - # Date: Mon Jul 21 15:42:59 2025 +0200 - # 1.0.0 release - set(UMF_TAG v1.0.0) -endif() - -message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") - -include(FetchContent) -FetchContent_Declare(unified-memory-framework - GIT_REPOSITORY ${UMF_REPO} - GIT_TAG ${UMF_TAG} -) if (UR_STATIC_ADAPTER_L0) if (UMF_BUILD_SHARED_LIBRARY) @@ -63,14 +43,34 @@ if (UR_STATIC_ADAPTER_L0) set(UMF_BUILD_SHARED_LIBRARY OFF) endif() endif() + +set(UR_USE_EXTERNAL_UMF ON CACHE BOOL "Use a pre-built UMF if available") -set(UR_USE_EXTERNAL_UMF OFF CACHE BOOL "Use a pre-built UMF") - -if (UR_USE_EXTERNAL_UMF) - find_package(umf REQUIRED) +if(UR_USE_EXTERNAL_UMF) + find_package(umf 1.0.0 QUIET) +endif() +if(umf_FOUND) + message(STATUS "Using preinstalled UMF at ${umf_DIR}, ignoring UMF build related options") # Add an alias matching the FetchContent case add_library(umf::headers ALIAS umf::umf_headers) else() + set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") + + # commit 1de269c00e46b7cbdbafa2247812c8c4bb4ed4a5 + # Author: Łukasz Stolarczuk + # Date: Mon Jul 21 15:42:59 2025 +0200 + # 1.0.0 release + set(UMF_TAG v1.0.0) + + if(NOT FETCHCONTENT_SOURCE_DIR_UNIFIED-MEMORY-FRAMEWORK) + message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") + endif() + + include(FetchContent) + FetchContent_Declare(unified-memory-framework + GIT_REPOSITORY ${UMF_REPO} + GIT_TAG ${UMF_TAG} + ) set(UMF_BUILD_TESTS OFF CACHE INTERNAL "Build UMF tests") set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "Build UMF examples") set(UMF_BUILD_SHARED_LIBRARY ${UMF_BUILD_SHARED_LIBRARY} CACHE INTERNAL "Build UMF shared library") @@ -80,6 +80,10 @@ else() endif() if(UR_ENABLE_LATENCY_HISTOGRAM) + find_package(hdr_histogram QUIET) + if(hdr_histogram_FOUND) + set(hdr_histogram_SOURCE_DIR "${hdr_histogram_DIR}") + else() set(HDR_HISTOGRAM_BUILD_STATIC CACHE INTERNAL ON "") set(HDR_HISTOGRAM_BUILD_SHARED CACHE INTERNAL OFF "") @@ -91,10 +95,10 @@ if(UR_ENABLE_LATENCY_HISTOGRAM) FetchContent_MakeAvailable(hdr_histogram) FetchContent_GetProperties(hdr_histogram) - - target_link_libraries(ur_common PUBLIC hdr_histogram_static) - target_include_directories(ur_common PUBLIC $) - target_compile_options(ur_common PUBLIC -DUR_ENABLE_LATENCY_HISTOGRAM=1) + endif() + target_link_libraries(ur_common PUBLIC hdr_histogram_static) + target_include_directories(ur_common PUBLIC $) + target_compile_options(ur_common PUBLIC -DUR_ENABLE_LATENCY_HISTOGRAM=1) endif() target_link_libraries(ur_common PUBLIC diff --git a/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp b/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp index c42a39d7cc..d95d1c6409 100644 --- a/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp +++ b/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp @@ -186,8 +186,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { } ur_result_t MemBuffer::free() { - for (const auto &[_, Ptr] : Allocations) { - ur_result_t URes = getTsanInterceptor()->releaseMemory(Context, Ptr); + for (const auto &[Device, Ptr] : Allocations) { + ur_result_t URes = Device + ? getTsanInterceptor()->releaseMemory(Context, Ptr) + : getContext()->urDdiTable.USM.pfnFree(Context, Ptr); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to free buffer handle {}", (void *)Ptr); diff --git a/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp b/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp index 86c12f9e89..3f9248489f 100644 --- a/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp +++ b/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp @@ -101,7 +101,7 @@ ur_result_t DeviceInfo::allocShadowMemory() { return UR_RESULT_SUCCESS; } -void ContextInfo::insertAllocInfo(TsanAllocInfo AI) { +void DeviceInfo::insertAllocInfo(TsanAllocInfo AI) { std::scoped_lock Guard(AllocInfosMutex); AllocInfos.insert(std::move(AI)); } @@ -153,7 +153,15 @@ ur_result_t TsanInterceptor::allocateMemory(ur_context_handle_t Context, auto AI = TsanAllocInfo{reinterpret_cast(Allocated), Size}; // For updating shadow memory - CI->insertAllocInfo(std::move(AI)); + if (Device) { + auto DI = getDeviceInfo(Device); + DI->insertAllocInfo(std::move(AI)); + } else { + for (const auto &Device : CI->DeviceList) { + auto DI = getDeviceInfo(Device); + DI->insertAllocInfo(AI); + } + } *ResultPtr = Allocated; return UR_RESULT_SUCCESS; @@ -163,11 +171,14 @@ ur_result_t TsanInterceptor::releaseMemory(ur_context_handle_t Context, void *Ptr) { auto CI = getContextInfo(Context); auto Addr = reinterpret_cast(Ptr); - { - std::scoped_lock Guard(CI->AllocInfosMutex); - auto It = std::find_if(CI->AllocInfos.begin(), CI->AllocInfos.end(), + + for (const auto &Device : CI->DeviceList) { + auto DI = getDeviceInfo(Device); + std::scoped_lock Guard(DI->AllocInfosMutex); + auto It = std::find_if(DI->AllocInfos.begin(), DI->AllocInfos.end(), [&](auto &P) { return P.AllocBegin == Addr; }); - CI->AllocInfos.erase(It); + if (It != DI->AllocInfos.end()) + DI->AllocInfos.erase(It); } UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, Ptr)); @@ -343,7 +354,7 @@ ur_result_t TsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, UR_CALL(prepareLaunch(CI, DI, InternalQueue, Kernel, LaunchInfo)); - UR_CALL(updateShadowMemory(CI, DI, Kernel, InternalQueue)); + UR_CALL(updateShadowMemory(DI, Kernel, InternalQueue)); UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(InternalQueue)); @@ -470,12 +481,12 @@ ur_result_t TsanInterceptor::prepareLaunch(std::shared_ptr &, return UR_RESULT_SUCCESS; } -ur_result_t TsanInterceptor::updateShadowMemory( - std::shared_ptr &CI, std::shared_ptr &DI, - ur_kernel_handle_t Kernel, ur_queue_handle_t Queue) { +ur_result_t TsanInterceptor::updateShadowMemory(std::shared_ptr &DI, + ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue) { auto &PI = getProgramInfo(GetProgram(Kernel)); - std::scoped_lock Guard(CI->AllocInfosMutex); - for (auto &AllocInfo : CI->AllocInfos) { + std::scoped_lock Guard(DI->AllocInfosMutex); + for (auto &AllocInfo : DI->AllocInfos) { UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo.AllocBegin, AllocInfo.AllocSize)); } diff --git a/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp b/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp index e700e70294..eefcba4036 100644 --- a/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp +++ b/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp @@ -44,9 +44,14 @@ struct DeviceInfo { std::shared_ptr Shadow; + ur_shared_mutex AllocInfosMutex; + std::set AllocInfos; + explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {} ur_result_t allocShadowMemory(); + + void insertAllocInfo(TsanAllocInfo AI); }; struct ContextInfo { @@ -56,9 +61,6 @@ struct ContextInfo { std::vector DeviceList; - ur_shared_mutex AllocInfosMutex; - std::set AllocInfos; - ur_shared_mutex InternalQueueMapMutex; std::unordered_map> InternalQueueMap; @@ -80,8 +82,6 @@ struct ContextInfo { ContextInfo &operator=(const ContextInfo &) = delete; - void insertAllocInfo(TsanAllocInfo AI); - ur_queue_handle_t getInternalQueue(ur_device_handle_t); }; @@ -297,8 +297,7 @@ class TsanInterceptor { ur_shared_mutex KernelLaunchMutex; private: - ur_result_t updateShadowMemory(std::shared_ptr &CI, - std::shared_ptr &DI, + ur_result_t updateShadowMemory(std::shared_ptr &DI, ur_kernel_handle_t Kernel, ur_queue_handle_t Queue); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 431b46d785..8ab7543fba 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,12 +3,23 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -include(FetchContent) -FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG v1.13.0 -) +set(GTEST_VER 1.13.0) + +find_package(GTest ${GTEST_VER} QUIET) + +if(GTest_FOUND AND NOT TARGET GTest::gmock) + message(WARNING "Found system install of GTest but not GMock. Building GTest and GMock from source") + set(GTest_FOUND FALSE) +endif() + +if(NOT GTest_FOUND) + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v${GTEST_VER} + ) +endif() include(FindLit) @@ -22,8 +33,10 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND UR_DPCXX AND UR_TEST_FUZZTESTS) set(UR_FUZZTESTING_ENABLED ON) endif() -set(INSTALL_GTEST OFF) -FetchContent_MakeAvailable(googletest) +if(NOT GTest_FOUND) + set(INSTALL_GTEST OFF) + FetchContent_MakeAvailable(googletest) +endif() enable_testing() # At the time of writing this comment, this is only used for level_zero adapter testing. @@ -112,7 +125,7 @@ function(add_gtest_test name) add_testing_binary(${TEST_TARGET_NAME} ${ARGN}) target_link_libraries(${TEST_TARGET_NAME} PRIVATE - gmock + GTest::gmock GTest::gtest_main) endfunction() diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index d43ca1cdb5..f0ff0ba890 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -39,7 +39,7 @@ if(NOT UR_FOUND_DPCXX) # Tests that require kernels can't be used if we aren't generating # device binaries message(WARNING - "UR_DPCXX is not defined, skipping some adapter tests for ${adapter}") + "UR_DPCXX is not defined, skipping kernels' tests for L0") else() add_conformance_kernels_test(link urProgramLink.cpp) add_l0_loader_kernels_test(kernel_create urKernelCreateWithNativeHandle.cpp) diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index bd57d78459..bc2d0b0966 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -67,8 +67,15 @@ add_l0_v2_devices_test(memory_residency ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp ) -add_l0_v2_kernels_test(deferred_kernel - deferred_kernel.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp -) +if(NOT UR_FOUND_DPCXX) + # Tests that require kernels can't be used if we aren't generating + # device binaries + message(WARNING + "UR_DPCXX is not defined, skipping kernels' tests for L0v2") +else() + add_l0_v2_kernels_test(deferred_kernel + deferred_kernel.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp + ) +endif()