Skip to content

Commit e4c325e

Browse files
committed
Merge branch 'sycl' into sean/usm-normalized-fix
2 parents bed61a8 + 7b34aee commit e4c325e

File tree

16 files changed

+132
-51
lines changed

16 files changed

+132
-51
lines changed

.github/workflows/sycl-linux-precommit.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,16 @@ jobs:
116116
extra_lit_opts: ${{ matrix.extra_lit_opts }}
117117
env: ${{ matrix.env || '{}' }}
118118

119-
install_igc_driver: ${{ contains(needs.detect_changes.outputs.filters, 'drivers') }}
120-
install_dev_igc_driver: ${{ matrix.use_igc_dev && contains(needs.detect_changes.outputs.filters, 'devigccfg') || 'false' }}
119+
# Do not install drivers on AMD and CUDA runners.
120+
install_igc_driver: |
121+
${{ !contains(matrix.target_devices, 'ext_oneapi_cuda') &&
122+
!contains(matrix.target_devices, 'ext_oneapi_hip') &&
123+
contains(needs.detect_changes.outputs.filters, 'drivers') }}
124+
install_dev_igc_driver: |
125+
${{ !contains(matrix.target_devices, 'ext_oneapi_cuda') &&
126+
!contains(matrix.target_devices, 'ext_oneapi_hip') &&
127+
matrix.use_igc_dev && contains(needs.detect_changes.outputs.filters, 'devigccfg') ||
128+
'false' }}
121129
# Run only if the PR does not have the 'ci-no-devigc' label.
122130
skip_run: ${{matrix.use_igc_dev && contains(github.event.pull_request.labels.*.name, 'ci-no-devigc') || 'false'}}
123131

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
123123
# Date: Tue Aug 20 16:28:30 2024 +0100
124124
# Merge pull request #1940 from RossBrunton/ross/urcall
125125
# [XPTI] Use `ur.call` rather than `ur` in XPTI
126-
set(UNIFIED_RUNTIME_TAG 0df25e229afd11e5d4052b7dfe0974e5443114b6)
126+
set(UNIFIED_RUNTIME_TAG bcf2244dccdef352afaf4d4520526573876981e3)
127127

128128
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
129129
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need

sycl/include/sycl/ext/oneapi/experimental/address_cast.hpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ inline namespace _V1 {
1616
namespace ext {
1717
namespace oneapi {
1818
namespace experimental {
19+
namespace detail {
20+
using namespace sycl::detail;
21+
}
1922
// Shorthands for address space names
2023
constexpr inline access::address_space global_space = access::address_space::global_space;
2124
constexpr inline access::address_space local_space = access::address_space::local_space;
@@ -32,6 +35,18 @@ static_address_cast(ElementType *Ptr) {
3235
if constexpr (Space == generic_space) {
3336
// Undecorated raw pointer is in generic AS already, no extra casts needed.
3437
return ret_ty(Ptr);
38+
} else if constexpr (Space == access::address_space::
39+
ext_intel_global_device_space ||
40+
Space ==
41+
access::address_space::ext_intel_global_host_space) {
42+
#ifdef __ENABLE_USM_ADDR_SPACE__
43+
// No SPIR-V intrinsic for this yet.
44+
using raw_type = detail::DecoratedType<ElementType, Space>::type *;
45+
auto CastPtr = (raw_type)(Ptr);
46+
#else
47+
auto CastPtr = sycl::detail::spirv::GenericCastToPtr<global_space>(Ptr);
48+
#endif
49+
return ret_ty(CastPtr);
3550
} else {
3651
auto CastPtr = sycl::detail::spirv::GenericCastToPtr<Space>(Ptr);
3752
return ret_ty(CastPtr);
@@ -60,6 +75,20 @@ dynamic_address_cast(ElementType *Ptr) {
6075
"The extension expects undecorated raw pointers only!");
6176
if constexpr (Space == generic_space) {
6277
return ret_ty(Ptr);
78+
} else if constexpr (Space == access::address_space::
79+
ext_intel_global_device_space ||
80+
Space ==
81+
access::address_space::ext_intel_global_host_space) {
82+
#ifdef __ENABLE_USM_ADDR_SPACE__
83+
static_assert(
84+
Space != access::address_space::ext_intel_global_device_space &&
85+
Space != access::address_space::ext_intel_global_host_space,
86+
"Not supported yet!");
87+
return ret_ty(nullptr);
88+
#else
89+
auto CastPtr = sycl::detail::spirv::GenericCastToPtr<global_space>(Ptr);
90+
return ret_ty(CastPtr);
91+
#endif
6392
} else {
6493
auto CastPtr = sycl::detail::spirv::GenericCastToPtrExplicit<Space>(Ptr);
6594
return ret_ty(CastPtr);

sycl/source/detail/thread_pool.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,17 @@ class ThreadPool {
2929
std::queue<std::function<void()>> MJobQueue;
3030
std::mutex MJobQueueMutex;
3131
std::condition_variable MDoSmthOrStop;
32-
std::atomic_bool MStop;
32+
bool MStop = false;
3333
std::atomic_uint MJobsInPool;
3434

3535
void worker() {
3636
GlobalHandler::instance().registerSchedulerUsage(/*ModifyCounter*/ false);
3737
std::unique_lock<std::mutex> Lock(MJobQueueMutex);
3838
while (true) {
39-
MDoSmthOrStop.wait(
40-
Lock, [this]() { return !MJobQueue.empty() || MStop.load(); });
39+
MDoSmthOrStop.wait(Lock,
40+
[this]() { return !MJobQueue.empty() || MStop; });
4141

42-
if (MStop.load())
42+
if (MStop)
4343
break;
4444

4545
std::function<void()> Job = std::move(MJobQueue.front());
@@ -57,7 +57,6 @@ class ThreadPool {
5757
void start() {
5858
MLaunchedThreads.reserve(MThreadCount);
5959

60-
MStop.store(false);
6160
MJobsInPool.store(0);
6261

6362
for (size_t Idx = 0; Idx < MThreadCount; ++Idx)
@@ -83,7 +82,10 @@ class ThreadPool {
8382
}
8483

8584
void finishAndWait() {
86-
MStop.store(true);
85+
{
86+
std::lock_guard<std::mutex> Lock(MJobQueueMutex);
87+
MStop = true;
88+
}
8789

8890
MDoSmthOrStop.notify_all();
8991

sycl/test-e2e/BFloat16/bfloat16_example.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
1-
///
2-
/// Checks a simple case of bfloat16, also employed for AOT library fallback.
3-
///
1+
/// Checks a simple case of bfloat16
42

5-
// CUDA is not compatible with SPIR.
6-
// UNSUPPORTED: cuda
7-
8-
// RUN: %clangxx -fsycl %s -o %t.out
3+
// RUN: %{build} -o %t.out
94
// RUN: %{run} %t.out
105

116
#include "bfloat16_example.hpp"
Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
/* This test checks that get_info checks its aspect and throws an invalid object
2-
error when ext::intel::info::device::free_memory is missing on L0*/
3-
4-
// REQUIRES: gpu, level_zero
1+
/* This test checks that get_info checks its aspect and passes without ZES_ENABLE_SYSMAN=1.*/
2+
// REQUIRES: gpu-intel-dg2, level_zero
53
// RUN: %{build} -o %t.out
64
// RUN: env ZES_ENABLE_SYSMAN=0 %{run} %t.out
75
// Explicitly set 'ZES_ENABLE_SYSMAN=0'. HWLOC initializes this environment
@@ -12,14 +10,14 @@
1210
#include <sycl/detail/core.hpp>
1311
int main() {
1412
sycl::queue q;
15-
bool failed = true;
13+
bool failed = false;
1614
try {
1715
sycl::device d(sycl::default_selector_v);
1816
size_t mem_free = d.get_info<sycl::ext::intel::info::device::free_memory>();
1917
} catch (const sycl::exception &e) {
2018
assert(e.code() == sycl::errc::feature_not_supported);
21-
std::cout << "Expected exception encountered: " << e.what() << std::endl;
22-
failed = false;
19+
std::cout << "UnExpected exception encountered: " << e.what() << std::endl;
20+
failed = true;
2321
}
2422
return failed;
2523
}

sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
// spir-v gen for legacy images at O0 not working
55
// UNSUPPORTED: O0
66

7-
// https://github.com/intel/llvm/issues/11434
8-
// UNSUPPORTED: gpu-intel-dg2
9-
107
// RUN: %{build} -o %t.out
118
// Native images are created with host pointers only with host unified memory
129
// support, enforce it for this test.

sycl/test-e2e/Plugin/level_zero_device_free_mem.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// https://github.com/intel/llvm/issues/14244
2-
// sycl-ls --verbose shows the "ext_intel_free_memory" aspect only if
3-
// ZES_ENABLE_SYSMAN=1 is set, so this test is missed if it requires
4-
// aspect-ext_intel_free_memory. Since gen9 and get12 don't support this query,
2+
// sycl-ls --verbose shows the "ext_intel_free_memory" aspect if
3+
// ZES_ENABLE_SYSMAN=1 is set, but also is sysman init is supported,
4+
// so this test is missed if it requires aspect-ext_intel_free_memory.
5+
6+
// Since gen9 and get12 don't support this query,
57
// so requiring DG2. There may be more devices in our CI supporting this aspect.
68
// REQUIRES: gpu-intel-dg2
79
// REQUIRES: level_zero, level_zero_dev_kit
@@ -10,9 +12,10 @@
1012
//
1113
// RUN: %{build} %level_zero_options -o %t.out
1214
// RUN: env ZES_ENABLE_SYSMAN=1 %{run} %t.out 2>&1 | FileCheck %s
15+
// RUN: env ZES_ENABLE_SYSMAN=0 %{run} %t.out 2>&1 | FileCheck %s
1316
//
1417
// The test is to check that the free device memory is reported by Level Zero
15-
// backend
18+
// backend both with and without the sysman environment variable.
1619
//
1720
// CHECK: Root-device free memory
1821

sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
// REQUIRES: level_zero
22
// REQUIRES: aspect-ext_intel_device_id
3-
// UNSUPPORTED: gpu-intel-pvc-1T
43
// https://github.com/intel/llvm/issues/14826
54
// XFAIL: arch-intel_gpu_pvc
65

sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
// REQUIRES: aspect-ext_intel_device_id
22
// REQUIRES: level_zero
3-
// UNSUPPORTED: gpu-intel-pvc-1T
43

54
// https://github.com/intel/llvm/issues/14826
65
// XFAIL: arch-intel_gpu_pvc

0 commit comments

Comments
 (0)