Skip to content

Commit ed3c60f

Browse files
authored
Merge branch 'sycl' into work_group_memoy_new
2 parents 6affbc3 + dcb975d commit ed3c60f

File tree

15 files changed

+207
-107
lines changed

15 files changed

+207
-107
lines changed

sycl/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS
455455
sycl-headers-extras
456456
sycl
457457
libsycldevice
458-
level-zero-sycl-dev
458+
unified-memory-framework
459+
unified-runtime-loader
459460
${XPTIFW_LIBS}
460461
${SYCL_TOOLCHAIN_DEPS}
461462
)

sycl/cmake/modules/FetchUnifiedRuntime.cmake

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
117117
endfunction()
118118

119119
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
120-
# commit 568a96aabc6edabe8514ae163aecc64cd5a41878
121-
# Author: Mateusz P. Nowak <[email protected]>
122-
# Date: Tue Oct 15 13:57:26 2024 +0200
123-
# Benchmark updates for faster run and more reliable results (#2164)
124-
set(UNIFIED_RUNTIME_TAG 568a96aabc6edabe8514ae163aecc64cd5a41878)
120+
# commit 80fdb0261d4a3623b05d1795f2f59ace9f718d76
121+
# Merge: 568a96aa 77b6c4c7
122+
# Author: aarongreig <[email protected]>
123+
# Date: Wed Oct 16 13:53:51 2024 +0100
124+
# Merge pull request #2206 from oneapi-src/revert-2138-counter-based-3
125+
# Revert "[L0] Phase 2 of Counter-Based Event Implementation"
126+
set(UNIFIED_RUNTIME_TAG 80fdb0261d4a3623b05d1795f2f59ace9f718d76)
125127

126128
set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
127129
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
@@ -230,14 +232,10 @@ find_package(Threads REQUIRED)
230232

231233
if(TARGET UnifiedRuntimeLoader)
232234
# Install the UR loader.
233-
# TODO: this is piggy-backing on the existing target component level-zero-sycl-dev
234-
# When UR is moved to its separate repo perhaps we should introduce new component,
235-
# e.g. unified-runtime-sycl-dev.
236-
# See github issue #14598
237235
install(TARGETS ur_loader
238-
LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev
239-
ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev
240-
RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev
236+
LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT unified-runtime-loader
237+
ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT unified-runtime-loader
238+
RUNTIME DESTINATION "bin" COMPONENT unified-runtime-loader
241239
)
242240
endif()
243241

@@ -301,8 +299,7 @@ if("native_cpu" IN_LIST SYCL_ENABLE_BACKENDS)
301299
endif()
302300
endif()
303301

304-
# TODO: this is piggy-backing on the existing target component level-zero-sycl-dev
305302
install(TARGETS umf
306-
LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev
307-
ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT level-zero-sycl-dev
308-
RUNTIME DESTINATION "bin" COMPONENT level-zero-sycl-dev)
303+
LIBRARY DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT unified-memory-framework
304+
ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT unified-memory-framework
305+
RUNTIME DESTINATION "bin" COMPONENT unified-memory-framework)

sycl/include/sycl/ext/oneapi/memcpy2d.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ template <typename T, typename>
1818
void handler::ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
1919
size_t SrcPitch, size_t Width,
2020
size_t Height) {
21+
#ifndef __SYCL_DEVICE_ONLY__
2122
throwIfGraphAssociated<
2223
ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
2324
sycl_ext_oneapi_memcpy2d>();
@@ -30,6 +31,7 @@ void handler::ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src,
3031
throw sycl::exception(sycl::make_error_code(errc::invalid),
3132
"Source pitch must be greater than or equal "
3233
"to the width specified in 'ext_oneapi_memcpy2d'");
34+
#endif
3335

3436
// Get the type of the pointers.
3537
context Ctx = detail::createSyclObjFromImpl<context>(getContextImplPtr());

sycl/include/sycl/handler.hpp

Lines changed: 106 additions & 75 deletions
Large diffs are not rendered by default.

sycl/include/syclcompat/memory.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,9 @@ static inline void wait_and_free(void *ptr,
644644
}
645645
}
646646

647+
// Anonymous namespace to disable ADL for functions which might clash (memcpy,
648+
// memset, free)
649+
namespace {
647650
/// Free the memory \p ptr on the default queue without synchronizing
648651
/// \param ptr Point to free.
649652
/// \returns no return value.
@@ -652,6 +655,7 @@ static inline void free(void *ptr, sycl::queue q = get_default_queue()) {
652655
sycl::free(ptr, q);
653656
}
654657
}
658+
} // namespace
655659

656660
/// Enqueues the release of all pointers in /p pointers on the /p q.
657661
/// The command waits on all passed /p events and returns an event that
@@ -677,6 +681,7 @@ inline sycl::event enqueue_free(const std::vector<void *> &pointers,
677681
return event;
678682
}
679683

684+
namespace {
680685
/// Synchronously copies \p size bytes from the address specified by \p from_ptr
681686
/// to the address specified by \p to_ptr. The function will
682687
/// return after the copy is completed.
@@ -691,6 +696,8 @@ static void memcpy(void *to_ptr, const void *from_ptr, size_t size,
691696
detail::memcpy(q, to_ptr, from_ptr, size).wait();
692697
}
693698

699+
} // namespace
700+
694701
/// Asynchronously copies \p size bytes from the address specified by \p
695702
/// from_ptr to the address specified by \p to_ptr. The return of the function
696703
/// does NOT guarantee the copy is completed.
@@ -723,6 +730,7 @@ memcpy_async(type_identity_t<T> *to_ptr, const type_identity_t<T> *from_ptr,
723730
static_cast<const void *>(from_ptr), count * sizeof(T));
724731
}
725732

733+
namespace {
726734
/// Synchronously copies \p count T's from the address specified by \p from_ptr
727735
/// to the address specified by \p to_ptr. The function will
728736
/// return after the copy is completed.
@@ -763,6 +771,8 @@ static inline void memcpy(void *to_ptr, size_t to_pitch, const void *from_ptr,
763771
detail::memcpy(q, to_ptr, from_ptr, to_pitch, from_pitch, x, y));
764772
}
765773

774+
} // namespace
775+
766776
/// Asynchronously copies 2D matrix specified by \p x and \p y from the address
767777
/// specified by \p from_ptr to the address specified by \p to_ptr, while \p
768778
/// \p from_pitch and \p to_pitch are the range of dim x in bytes of the matrix
@@ -785,6 +795,7 @@ static inline sycl::event memcpy_async(void *to_ptr, size_t to_pitch,
785795
return detail::combine_events(events, q);
786796
}
787797

798+
namespace {
788799
/// Synchronously copies a subset of a 3D matrix specified by \p to to another
789800
/// 3D matrix specified by \p from. The from and to position info are specified
790801
/// by \p from_pos and \p to_pos The copied matrix size is specified by \p size.
@@ -803,6 +814,7 @@ static inline void memcpy(pitched_data to, sycl::id<3> to_pos,
803814
sycl::queue q = get_default_queue()) {
804815
sycl::event::wait(detail::memcpy(q, to, to_pos, from, from_pos, size));
805816
}
817+
} // namespace
806818

807819
/// Asynchronously copies a subset of a 3D matrix specified by \p to to another
808820
/// 3D matrix specified by \p from. The from and to position info are specified
@@ -824,6 +836,7 @@ static inline sycl::event memcpy_async(pitched_data to, sycl::id<3> to_pos,
824836
return detail::combine_events(events, q);
825837
}
826838

839+
namespace {
827840
/// Synchronously sets \p pattern to the first \p count elements starting from
828841
/// \p dev_ptr. The function will return after the fill operation is completed.
829842
///
@@ -838,6 +851,7 @@ static void inline fill(void *dev_ptr, const T &pattern, size_t count,
838851
sycl::queue q = get_default_queue()) {
839852
detail::fill(q, dev_ptr, pattern, count).wait();
840853
}
854+
} // namespace
841855

842856
/// Asynchronously sets \p pattern to the first \p count elements starting from
843857
/// \p dev_ptr.
@@ -882,6 +896,7 @@ static inline void memcpy_async(const memcpy_parameter &param,
882896
}
883897
} // namespace experimental
884898

899+
namespace {
885900
/// Synchronously sets \p value to the first \p size bytes starting from \p
886901
/// dev_ptr. The function will return after the memset operation is completed.
887902
///
@@ -894,6 +909,7 @@ static void memset(void *dev_ptr, int value, size_t size,
894909
sycl::queue q = get_default_queue()) {
895910
detail::memset(q, dev_ptr, value, size).wait();
896911
}
912+
} // namespace
897913

898914
/// \brief Sets 2 bytes data \p value to the first \p size elements starting
899915
/// from \p dev_ptr in \p q synchronously.
@@ -954,6 +970,7 @@ memset_d32_async(void *dev_ptr, unsigned int value, size_t size,
954970
return detail::fill<unsigned int>(q, dev_ptr, value, size);
955971
}
956972

973+
namespace {
957974
/// \brief Sets 1 byte data \p val to the pitched 2D memory region pointed by \p
958975
/// ptr in \p q synchronously.
959976
/// \param [in] ptr Pointer to the virtual device memory.
@@ -966,6 +983,7 @@ static inline void memset(void *ptr, size_t pitch, int val, size_t x, size_t y,
966983
sycl::queue q = get_default_queue()) {
967984
sycl::event::wait(detail::memset<unsigned char>(q, ptr, pitch, val, x, y));
968985
}
986+
} // namespace
969987

970988
/// \brief Sets 2 bytes data \p val to the pitched 2D memory region pointed by
971989
/// ptr in \p q synchronously.
@@ -1044,6 +1062,7 @@ memset_d32_async(void *ptr, size_t pitch, unsigned int val, size_t x, size_t y,
10441062
return detail::combine_events(events, q);
10451063
}
10461064

1065+
namespace {
10471066
/// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size
10481067
/// specify the setted 3D memory size. The function will return after the
10491068
/// memset operation is completed.
@@ -1057,6 +1076,7 @@ static inline void memset(pitched_data pitch, int val, sycl::range<3> size,
10571076
sycl::queue q = get_default_queue()) {
10581077
sycl::event::wait(detail::memset<unsigned char>(q, pitch, val, size));
10591078
}
1079+
} // namespace
10601080

10611081
/// Sets \p value to the 3D memory region specified by \p pitch in \p q. \p size
10621082
/// specify the setted 3D memory size. The return of the function does NOT

sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
// REQUIRES: level_zero
22
// REQUIRES: aspect-ext_intel_device_id
3-
// https://github.com/intel/llvm/issues/14826
4-
// XFAIL: arch-intel_gpu_pvc
3+
4+
// XFAIL: gpu-intel-pvc-1T
5+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15699
56

67
// RUN: %{build} -Wno-error=deprecated-declarations -o %t.out
78

sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
// REQUIRES: aspect-ext_intel_device_id
22
// REQUIRES: level_zero
33

4-
// https://github.com/intel/llvm/issues/14826
5-
// XFAIL: arch-intel_gpu_pvc
4+
// XFAIL: gpu-intel-pvc-1T
5+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15699
6+
67
// RUN: %{build} -o %t.out
78

89
// TODO: at this time PVC 1T systems are not correctly supporting CSLICE

sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// REQUIRES: arch-intel_gpu_pvc, level_zero
22

3-
// https://github.com/intel/llvm/issues/14826
43
// XFAIL: arch-intel_gpu_pvc
4+
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15602
55

66
// RUN: %{build} %level_zero_options -o %t.out
77

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// REQUIRES: gpu, linux
1+
// REQUIRES: gpu
22

3-
// RUN: %clangxx -Wno-error=vla-cxx-extension -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/FindPrimesSYCL.cpp %S/Inputs/main.cpp -o %t.out -lpthread
3+
// RUN: %clangxx -Wno-error=vla-cxx-extension -fsycl -fsycl-targets=%{sycl_triple} %S/Inputs/FindPrimesSYCL.cpp %S/Inputs/main.cpp -o %t.out %threads_lib
44
// RUN: %{run} %t.out

sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
// UNSUPPORTED: windows
2-
// RUN: %{build} -o %t.out -lpthread
1+
// RUN: %{build} -o %t.out %threads_lib
32
// RUN: %{run} %t.out
43

54
#include <sycl/detail/core.hpp>

0 commit comments

Comments
 (0)