Skip to content

Commit 832baeb

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (5 commits)
2 parents c4808e3 + 284e9f0 commit 832baeb

File tree

9 files changed

+93
-80
lines changed

9 files changed

+93
-80
lines changed

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 20 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,28 @@
11
# Lowering of SYCL ESIMD kernels depends on vc-intrinsics
22
# NOTE: could have been added earlier from llvm/projects
33
if (NOT TARGET LLVMGenXIntrinsics)
4-
if(NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR AND NOT LLVMGenXIntrinsics_INCLUDE_DIR)
5-
find_package(LLVMGenXIntrinsics QUIET)
6-
endif()
4+
find_package(LLVMGenXIntrinsics QUIET)
75

8-
if (NOT LLVMGenXIntrinsics_FOUND)
9-
if (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
10-
find_path(LLVMGenXIntrinsics_INCLUDE_DIR GenXIntrinsics/include PATHS ${CMAKE_PREFIX_PATH})
6+
if (NOT LLVMGenXIntrinsics_FOUND)
7+
set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git)
118

12-
if (NOT LLVMGenXIntrinsics_INCLUDE_DIR)
13-
set(LLVMGenXIntrinsics_GIT_REPO https://github.com/intel/vc-intrinsics.git)
14-
15-
# Date: May 29, 2025
16-
# Use OneNthEltsVecArgument instead of HalfVecArguments to fix build failure.
17-
set(LLVMGenXIntrinsics_GIT_TAG 60cea7590bd022d95f5cf336ee765033bd114d69)
18-
19-
message(STATUS "vc-intrinsics repo is missing. Will try to download it from ${LLVMGenXIntrinsics_GIT_REPO}")
20-
include(FetchContent)
21-
FetchContent_Declare(vc-intrinsics
22-
GIT_REPOSITORY ${LLVMGenXIntrinsics_GIT_REPO}
23-
GIT_TAG ${LLVMGenXIntrinsics_GIT_TAG}
24-
)
25-
FetchContent_MakeAvailable(vc-intrinsics)
26-
FetchContent_GetProperties(vc-intrinsics)
27-
28-
set(LLVMGenXIntrinsics_SOURCE_DIR ${vc-intrinsics_SOURCE_DIR})
29-
set(LLVMGenXIntrinsics_BINARY_DIR ${vc-intrinsics_BINARY_DIR})
30-
else()
31-
message(STATUS "vc-intrinsics found in system at ${LLVMGenXIntrinsics_INCLUDE_DIR}")
32-
set(LLVMGenXIntrinsics_SOURCE_DIR ${LLVMGenXIntrinsics_INCLUDE_DIR}/..)
33-
set(LLVMGenXIntrinsics_BINARY_DIR ${CMAKE_BINARY_DIR}/vc-intrinsics-build)
34-
add_subdirectory(${LLVMGenXIntrinsics_SOURCE_DIR} ${LLVMGenXIntrinsics_BINARY_DIR})
35-
endif()
36-
else (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
37-
# -DLLVMGenXIntrinsics_SOURCE_DIR is provided
38-
message(STATUS "vc-intrinsics are added manually ${LLVMGenXIntrinsics_SOURCE_DIR}")
39-
40-
set(LLVMGenXIntrinsics_BINARY_DIR ${CMAKE_BINARY_DIR}/vc-intrinsics-build)
41-
add_subdirectory(${LLVMGenXIntrinsics_SOURCE_DIR} ${LLVMGenXIntrinsics_BINARY_DIR})
42-
endif (NOT DEFINED LLVMGenXIntrinsics_SOURCE_DIR)
43-
target_include_directories(LLVMGenXIntrinsics
44-
PUBLIC $<BUILD_INTERFACE:${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include>
45-
PUBLIC $<BUILD_INTERFACE:${LLVMGenXIntrinsics_BINARY_DIR}/GenXIntrinsics/include>
46-
)
47-
endif()
9+
# Date: May 29, 2025
10+
# Use OneNthEltsVecArgument instead of HalfVecArguments to fix build failure.
11+
set(LLVMGenXIntrinsics_GIT_TAG 60cea7590bd022d95f5cf336ee765033bd114d69)
12+
if(NOT FETCHCONTENT_SOURCE_DIR_VC-INTRINSICS)
13+
message(STATUS "vc-intrinsics repo is missing. Will try to download "
14+
"${LLVMGenXIntrinsics_GIT_TAG} from ${LLVMGenXIntrinsics_GIT_REPO}")
15+
endif()
16+
include(FetchContent)
17+
FetchContent_Declare(vc-intrinsics
18+
GIT_REPOSITORY ${LLVMGenXIntrinsics_GIT_REPO}
19+
GIT_TAG ${LLVMGenXIntrinsics_GIT_TAG}
20+
)
21+
FetchContent_MakeAvailable(vc-intrinsics)
22+
FetchContent_GetProperties(vc-intrinsics)
23+
else()
24+
message(STATUS "vc-intrinsics found in system at ${LLVMGenXIntrinsics_DIR}")
25+
endif()
4826
endif (NOT TARGET LLVMGenXIntrinsics)
4927

5028
add_llvm_component_library(LLVMSYCLLowerIR
@@ -88,8 +66,6 @@ add_llvm_component_library(LLVMSYCLLowerIR
8866

8967
ADDITIONAL_HEADER_DIRS
9068
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
91-
${LLVM_MAIN_SRC_DIR}/projects/vc-intrinsics/GenXIntrinsics/include
92-
${LLVM_BINARY_DIR}/projects/vc-intrinsics/GenXIntrinsics/include
9369

9470
DEPENDS
9571
intrinsics_gen
@@ -114,10 +90,6 @@ add_llvm_component_library(LLVMSYCLLowerIR
11490
ipo
11591
)
11692

117-
target_include_directories(LLVMSYCLLowerIR
118-
PUBLIC $<BUILD_INTERFACE:${LLVMGenXIntrinsics_SOURCE_DIR}/GenXIntrinsics/include/>
119-
PUBLIC $<BUILD_INTERFACE:${LLVMGenXIntrinsics_BINARY_DIR}>
120-
)
12193
target_link_libraries(LLVMSYCLLowerIR
12294
PUBLIC LLVMGenXIntrinsics
12395
)

sycl/test-e2e/Matrix/SG32/joint_matrix_half.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ int main() {
3333
matrix_combinations>();
3434

3535
for (unsigned int i = 0; i < combinations.size(); i++) {
36+
if (combinations[i].atype != matrix_type::fp16)
37+
continue;
38+
3639
if (combinations[i].nsize == 0) { // Intel AMX
3740
test<half, float, float, /*TM*/ 16, /*TN*/ 16, /*TK*/ 16,
3841
layout::ext_intel_packed, 2>();

sycl/test-e2e/Matrix/joint_matrix_half.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ int main() {
2424
matrix_combinations>();
2525

2626
for (unsigned int i = 0; i < combinations.size(); i++) {
27+
if (combinations[i].atype != matrix_type::fp16)
28+
continue;
29+
2730
if (combinations[i].nsize == 0) { // Intel AMX
2831
test<half, float, float, /*TM*/ 16, /*TN*/ 16, /*TK*/ 16,
2932
layout::ext_intel_packed, 2>();

unified-runtime/source/adapters/offload/device.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
8585
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
8686
// TODO: Implement subgroups in Offload
8787
return ReturnValue(1);
88+
case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
89+
if (pPropSizeRet) {
90+
*pPropSizeRet = sizeof(size_t);
91+
}
92+
93+
if (pPropValue) {
94+
uint32_t as32;
95+
OL_RETURN_ON_ERR(olGetDeviceInfo(hDevice->OffloadDevice,
96+
OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
97+
sizeof(as32), &as32));
98+
99+
*reinterpret_cast<size_t *>(pPropValue) = as32;
100+
}
101+
102+
return UR_RESULT_SUCCESS;
88103
case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
89104
// OL dimensions are uint32_t while UR is size_t, so they need to be mapped
90105
if (pPropSizeRet) {
@@ -94,9 +109,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
94109
if (pPropValue) {
95110
ol_dimensions_t olVec;
96111
size_t *urVec = reinterpret_cast<size_t *>(pPropValue);
97-
OL_RETURN_ON_ERR(olGetDeviceInfo(hDevice->OffloadDevice,
98-
OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
99-
sizeof(olVec), &olVec));
112+
OL_RETURN_ON_ERR(
113+
olGetDeviceInfo(hDevice->OffloadDevice,
114+
OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION,
115+
sizeof(olVec), &olVec));
100116

101117
urVec[0] = olVec.x;
102118
urVec[1] = olVec.y;

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
186186
}
187187

188188
ur_result_t MemBuffer::free() {
189-
for (const auto &[_, Ptr] : Allocations) {
190-
ur_result_t URes = getTsanInterceptor()->releaseMemory(Context, Ptr);
189+
for (const auto &[Device, Ptr] : Allocations) {
190+
ur_result_t URes = Device
191+
? getTsanInterceptor()->releaseMemory(Context, Ptr)
192+
: getContext()->urDdiTable.USM.pfnFree(Context, Ptr);
191193
if (URes != UR_RESULT_SUCCESS) {
192194
UR_LOG_L(getContext()->logger, ERR, "Failed to free buffer handle {}",
193195
(void *)Ptr);

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ ur_result_t DeviceInfo::allocShadowMemory() {
101101
return UR_RESULT_SUCCESS;
102102
}
103103

104-
void ContextInfo::insertAllocInfo(TsanAllocInfo AI) {
104+
void DeviceInfo::insertAllocInfo(TsanAllocInfo AI) {
105105
std::scoped_lock<ur_shared_mutex> Guard(AllocInfosMutex);
106106
AllocInfos.insert(std::move(AI));
107107
}
@@ -153,7 +153,15 @@ ur_result_t TsanInterceptor::allocateMemory(ur_context_handle_t Context,
153153

154154
auto AI = TsanAllocInfo{reinterpret_cast<uptr>(Allocated), Size};
155155
// For updating shadow memory
156-
CI->insertAllocInfo(std::move(AI));
156+
if (Device) {
157+
auto DI = getDeviceInfo(Device);
158+
DI->insertAllocInfo(std::move(AI));
159+
} else {
160+
for (const auto &Device : CI->DeviceList) {
161+
auto DI = getDeviceInfo(Device);
162+
DI->insertAllocInfo(AI);
163+
}
164+
}
157165

158166
*ResultPtr = Allocated;
159167
return UR_RESULT_SUCCESS;
@@ -163,11 +171,14 @@ ur_result_t TsanInterceptor::releaseMemory(ur_context_handle_t Context,
163171
void *Ptr) {
164172
auto CI = getContextInfo(Context);
165173
auto Addr = reinterpret_cast<uptr>(Ptr);
166-
{
167-
std::scoped_lock<ur_shared_mutex> Guard(CI->AllocInfosMutex);
168-
auto It = std::find_if(CI->AllocInfos.begin(), CI->AllocInfos.end(),
174+
175+
for (const auto &Device : CI->DeviceList) {
176+
auto DI = getDeviceInfo(Device);
177+
std::scoped_lock<ur_shared_mutex> Guard(DI->AllocInfosMutex);
178+
auto It = std::find_if(DI->AllocInfos.begin(), DI->AllocInfos.end(),
169179
[&](auto &P) { return P.AllocBegin == Addr; });
170-
CI->AllocInfos.erase(It);
180+
if (It != DI->AllocInfos.end())
181+
DI->AllocInfos.erase(It);
171182
}
172183

173184
UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, Ptr));
@@ -343,7 +354,7 @@ ur_result_t TsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
343354

344355
UR_CALL(prepareLaunch(CI, DI, InternalQueue, Kernel, LaunchInfo));
345356

346-
UR_CALL(updateShadowMemory(CI, DI, Kernel, InternalQueue));
357+
UR_CALL(updateShadowMemory(DI, Kernel, InternalQueue));
347358

348359
UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(InternalQueue));
349360

@@ -470,12 +481,12 @@ ur_result_t TsanInterceptor::prepareLaunch(std::shared_ptr<ContextInfo> &,
470481
return UR_RESULT_SUCCESS;
471482
}
472483

473-
ur_result_t TsanInterceptor::updateShadowMemory(
474-
std::shared_ptr<ContextInfo> &CI, std::shared_ptr<DeviceInfo> &DI,
475-
ur_kernel_handle_t Kernel, ur_queue_handle_t Queue) {
484+
ur_result_t TsanInterceptor::updateShadowMemory(std::shared_ptr<DeviceInfo> &DI,
485+
ur_kernel_handle_t Kernel,
486+
ur_queue_handle_t Queue) {
476487
auto &PI = getProgramInfo(GetProgram(Kernel));
477-
std::scoped_lock<ur_shared_mutex> Guard(CI->AllocInfosMutex);
478-
for (auto &AllocInfo : CI->AllocInfos) {
488+
std::scoped_lock<ur_shared_mutex> Guard(DI->AllocInfosMutex);
489+
for (auto &AllocInfo : DI->AllocInfos) {
479490
UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo.AllocBegin,
480491
AllocInfo.AllocSize));
481492
}

unified-runtime/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,14 @@ struct DeviceInfo {
4444

4545
std::shared_ptr<ShadowMemory> Shadow;
4646

47+
ur_shared_mutex AllocInfosMutex;
48+
std::set<TsanAllocInfo> AllocInfos;
49+
4750
explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {}
4851

4952
ur_result_t allocShadowMemory();
53+
54+
void insertAllocInfo(TsanAllocInfo AI);
5055
};
5156

5257
struct ContextInfo {
@@ -56,9 +61,6 @@ struct ContextInfo {
5661

5762
std::vector<ur_device_handle_t> DeviceList;
5863

59-
ur_shared_mutex AllocInfosMutex;
60-
std::set<TsanAllocInfo> AllocInfos;
61-
6264
ur_shared_mutex InternalQueueMapMutex;
6365
std::unordered_map<ur_device_handle_t, std::optional<ManagedQueue>>
6466
InternalQueueMap;
@@ -80,8 +82,6 @@ struct ContextInfo {
8082

8183
ContextInfo &operator=(const ContextInfo &) = delete;
8284

83-
void insertAllocInfo(TsanAllocInfo AI);
84-
8585
ur_queue_handle_t getInternalQueue(ur_device_handle_t);
8686
};
8787

@@ -297,8 +297,7 @@ class TsanInterceptor {
297297
ur_shared_mutex KernelLaunchMutex;
298298

299299
private:
300-
ur_result_t updateShadowMemory(std::shared_ptr<ContextInfo> &CI,
301-
std::shared_ptr<DeviceInfo> &DI,
300+
ur_result_t updateShadowMemory(std::shared_ptr<DeviceInfo> &DI,
302301
ur_kernel_handle_t Kernel,
303302
ur_queue_handle_t Queue);
304303

unified-runtime/test/adapters/level_zero/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2024 Intel Corporation
1+
# Copyright (C) 2024-2025 Intel Corporation
22
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -39,7 +39,7 @@ if(NOT UR_FOUND_DPCXX)
3939
# Tests that require kernels can't be used if we aren't generating
4040
# device binaries
4141
message(WARNING
42-
"UR_DPCXX is not defined, skipping some adapter tests for ${adapter}")
42+
"UR_DPCXX is not defined, skipping kernels' tests for L0")
4343
else()
4444
add_conformance_kernels_test(link urProgramLink.cpp)
4545
add_l0_loader_kernels_test(kernel_create urKernelCreateWithNativeHandle.cpp)

unified-runtime/test/adapters/level_zero/v2/CMakeLists.txt

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2024 Intel Corporation
1+
# Copyright (C) 2024-2025 Intel Corporation
22
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
@@ -67,8 +67,15 @@ add_l0_v2_devices_test(memory_residency
6767
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp
6868
)
6969

70-
add_l0_v2_kernels_test(deferred_kernel
71-
deferred_kernel.cpp
72-
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp
73-
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp
74-
)
70+
if(NOT UR_FOUND_DPCXX)
71+
# Tests that require kernels can't be used if we aren't generating
72+
# device binaries
73+
message(WARNING
74+
"UR_DPCXX is not defined, skipping kernels' tests for L0v2")
75+
else()
76+
add_l0_v2_kernels_test(deferred_kernel
77+
deferred_kernel.cpp
78+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp
79+
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp
80+
)
81+
endif()

0 commit comments

Comments
 (0)