From ea115c666b19d4d847d1573fcd15a25eb71f1276 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 29 Oct 2025 08:49:14 -0700 Subject: [PATCH 01/15] [SYCL RTC] Add `sycl-rtc-experimental-redist-mode` option --- buildbot/configure.py | 11 +++ clang/include/clang/Driver/Options.td | 8 ++ clang/lib/Headers/mm_malloc.h | 9 ++- libc/include/CMakeLists.txt | 2 + libc/include/wchar.yaml | 4 +- libcxx/include/__config | 4 +- libcxx/include/__locale_dir/locale_base_api.h | 5 +- sycl-jit/jit-compiler/CMakeLists.txt | 38 +++++++++ .../lib/libc-config/entrypoints.txt | 5 ++ .../jit-compiler/lib/libc-config/headers.txt | 1 + .../sycl-rtc-standalone/__external_threading | 78 ++++++++++++++++++ .../include/sycl-rtc-standalone/linux/errno.h | 0 .../lib/rtc/DeviceCompilation.cpp | 48 ++++++++++- sycl-jit/jit-compiler/utils/generate.py | 14 +++- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 51 ++++++++++++ sycl/include/sycl/builtins.hpp | 5 ++ sycl/include/sycl/detail/os_util.hpp | 5 +- .../KernelCompiler/exp_redist_mode.cpp | 79 +++++++++++++++++++ .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 19 files changed, 352 insertions(+), 17 deletions(-) create mode 100644 sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt create mode 100644 sycl-jit/jit-compiler/lib/libc-config/headers.txt create mode 100644 sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading create mode 100644 sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h create mode 100644 sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp diff --git a/buildbot/configure.py b/buildbot/configure.py index 3e9d45fbfe2ad..3353666c9d9a0 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -179,6 +179,11 @@ def do_configure(args, passthrough_args): install_dir = os.path.join(abs_obj_dir, "install") + llvm_enable_runtimes = "libcxx" + if platform.system() != "Windows": + llvm_enable_runtimes += ";libcxxabi;libunwind" + llvm_enable_runtimes += ";libc" + cmake_cmd = [ "cmake", "-G", @@ -195,6 +200,12 @@ def do_configure(args, passthrough_args): "-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR={}".format(libdevice_dir), "-DLLVM_EXTERNAL_SYCL_JIT_SOURCE_DIR={}".format(jit_dir), "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects), + "-DLLVM_ENABLE_RUNTIMES={}".format(llvm_enable_runtimes), + "-DLLVM_LIBC_FULL_BUILD=ON", + "-DLLVM_LIBC_ALL_HEADERS=1", + "-DLIBC_CONFIG_PATH={}".format( + os.path.join(abs_src_dir, "sycl-jit/jit-compiler/lib/libc-config") + ), "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), "-DLLVM_BUILD_TOOLS=ON", "-DLLVM_ENABLE_ZSTD={}".format(llvm_enable_zstd), diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 540d589bd270b..cd5fd452ce41b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7561,6 +7561,14 @@ let Visibility = [SYCLRTCOnlyOption] in { : Joined<["--"], "persistent-auto-pch=">, HelpText<"Use Persistent Auto-PCH cache located at for SYCL " "RTC Compilation">; + def sycl_rtc_exp_redist_mode + : Flag<["--"], "sycl-rtc-experimental-redist-mode">, + HelpText<"Use in-memory system includes">; + def sycl_rtc_in_memory_fs_only + : Flag<["--"], "sycl-rtc-in-memory-fs-only">, + HelpText<"Disable real filesystem access for SCYl RTC compilation, " + "debug/testing only">, + Flags<[HelpHidden]>; } // let Group = sycl_rtc_only_Group } // let Visibility = [SYCLRTCOnlyOption] diff --git a/clang/lib/Headers/mm_malloc.h b/clang/lib/Headers/mm_malloc.h index d32fe59416277..86ed5f1930815 100644 --- a/clang/lib/Headers/mm_malloc.h +++ b/clang/lib/Headers/mm_malloc.h @@ -12,7 +12,8 @@ #include -#ifdef _WIN32 +#if defined(_WIN32) && \ + !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__)) #include #else #ifndef __cplusplus @@ -41,7 +42,8 @@ _mm_malloc(size_t __size, size_t __align) { void *__mallocedMemory; #if defined(__MINGW32__) __mallocedMemory = __mingw_aligned_malloc(__size, __align); -#elif defined(_WIN32) +#elif defined(_WIN32) && \ + !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__)) __mallocedMemory = _aligned_malloc(__size, __align); #else if (posix_memalign(&__mallocedMemory, __align, __size)) @@ -56,7 +58,8 @@ _mm_free(void *__p) { #if defined(__MINGW32__) __mingw_aligned_free(__p); -#elif defined(_WIN32) +#elif defined(_WIN32) && \ + !(defined(__SYCL_DEVICE_ONLY__) && defined(__LLVM_LIBC__)) _aligned_free(__p); #else free(__p); diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 764d66982ea2c..3b105ef52d0ea 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -825,6 +825,8 @@ foreach(target IN LISTS all_install_header_targets) endforeach() if(LLVM_LIBC_FULL_BUILD) + add_custom_target(generate-libc-headers + DEPENDS libc-headers) add_custom_target(install-libc-headers DEPENDS libc-headers COMMAND "${CMAKE_COMMAND}" diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 8178091ab2202..763d53fc2958b 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -246,7 +246,7 @@ functions: - type: const wchar_t **__restrict - type: size_t - type: size_t - - type: mbstate_t + - type: mbstate_t *__restrict - name: wcsrtombs standards: - stdc @@ -255,7 +255,7 @@ functions: - type: char *__restrict - type: const wchar_t **__restrict - type: size_t - - type: mbstate_t + - type: mbstate_t *__restrict - name: wcrtomb standards: - stdc diff --git a/libcxx/include/__config b/libcxx/include/__config index b4c081dcdff1b..a18fbc799716c 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -274,7 +274,7 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_MSVCRT_LIKE // If mingw not explicitly detected, assume using MS C runtime only if // a MS compatibility version is specified. -# if defined(_MSC_VER) && !defined(__MINGW32__) +# if defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_LIBCPP_NO_VCRUNTIME) # define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library # endif # if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) @@ -911,7 +911,7 @@ typedef __char32_t char32_t; # endif # if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \ - _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__) + _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__) || defined(__SYCL_DEVICE_ONLY__) # define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE # endif diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index 9f3ce02a3af20..355787fda4b45 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -110,8 +110,9 @@ // } #if _LIBCPP_HAS_LOCALIZATION - -# if defined(__APPLE__) +# if defined(__SYCL_DEVICE_ONLY__) +# include <__locale_dir/support/fuchsia.h> // no_locale +# elif defined(__APPLE__) # include <__locale_dir/support/apple.h> # elif defined(__FreeBSD__) # include <__locale_dir/support/freebsd.h> diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index dab40ee9eed58..1e8f8ecb76bca 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -45,6 +45,41 @@ add_custom_target(rtc-prepare-resources ${SYCL_JIT_RESOURCE_FILES} ) +set(SYCL_JIT_RUNTIME_RESOURCE_DEPS) +set(SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS) + +if ("libcxx" IN_LIST LLVM_ENABLE_RUNTIMES) + list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS runtimes-configure) + + if (WIN32) + set(SYCL_JIT_CXX_CONFIG_SITE ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/c++/v1/__config_site) + else() + set(SYCL_JIT_CXX_CONFIG_SITE ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/x86_64-unknown-linux-gnu/c++/v1/__config_site) + endif() + + list(APPEND SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --target generate-cxx-headers + COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --prefix ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install --component cxx-headers + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/sycl-rtc-standalone + COMMAND sed 's/_LIBCPP_HAS_THREAD_API_EXTERNAL 0/_LIBCPP_HAS_THREAD_API_EXTERNAL 1/' ${SYCL_JIT_CXX_CONFIG_SITE} > ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/sycl-rtc-standalone/__config_site + ) +endif() + +if ("libc" IN_LIST LLVM_ENABLE_RUNTIMES) + list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS runtimes-configure) + # list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS libc) + + list(APPEND SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --target generate-libc-headers + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins/libc/include ${SYCL_JIT_RESOURCE_INSTALL_DIR}/include/libc + ) +endif() + +add_custom_target(rtc-prepare-runtime-resources + DEPENDS ${SYCL_JIT_RUNTIME_RESOURCE_DEPS} + ${SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS} +) + add_custom_command( OUTPUT ${SYCL_JIT_RESOURCE_CPP} COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py --toolchain-dir ${SYCL_JIT_RESOURCE_INSTALL_DIR} --output ${SYCL_JIT_RESOURCE_CPP} --prefix ${SYCL_JIT_VIRTUAL_TOOLCHAIN_ROOT} @@ -52,6 +87,8 @@ add_custom_command( rtc-prepare-resources ${SYCL_JIT_RESOURCE_DEPS} ${SYCL_JIT_RESOURCE_FILES} + rtc-prepare-runtime-resources + ${SYCL_JIT_RUNTIME_RESOURCE_DEPS} ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py ) @@ -94,6 +131,7 @@ add_custom_command( ${SYCL_JIT_RESOURCE_CPP} ${SYCL_JIT_RESOURCE_DEPS} ${SYCL_JIT_RESOURCE_FILES} + ${SYCL_JIT_RUNTIME_RESOURCE_DEPS} ${CMAKE_CURRENT_SOURCE_DIR}/include/Resource.h ) diff --git a/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt b/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt new file mode 100644 index 0000000000000..05e1f4aa2df8c --- /dev/null +++ b/sycl-jit/jit-compiler/lib/libc-config/entrypoints.txt @@ -0,0 +1,5 @@ +if(EXISTS "${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") + include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/${LIBC_TARGET_ARCHITECTURE}/entrypoints.txt") +else() + include("${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/entrypoints.txt") +endif() diff --git a/sycl-jit/jit-compiler/lib/libc-config/headers.txt b/sycl-jit/jit-compiler/lib/libc-config/headers.txt new file mode 100644 index 0000000000000..72ae7d85517b9 --- /dev/null +++ b/sycl-jit/jit-compiler/lib/libc-config/headers.txt @@ -0,0 +1 @@ +include("${LIBC_SOURCE_DIR}/config/linux/x86_64/headers.txt") diff --git a/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading new file mode 100644 index 0000000000000..02b522468f138 --- /dev/null +++ b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/__external_threading @@ -0,0 +1,78 @@ +_LIBCPP_BEGIN_NAMESPACE_STD + +using __libcpp_timespec_t = int; + +// +// Mutex +// +using __libcpp_mutex_t = int; +#define _LIBCPP_MUTEX_INITIALIZER 0 + +using __libcpp_recursive_mutex_t = int; + +int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_lock(__libcpp_recursive_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_recursive_mutex_trylock(__libcpp_recursive_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_recursive_mutex_unlock(__libcpp_recursive_mutex_t*); +int __libcpp_recursive_mutex_destroy(__libcpp_recursive_mutex_t*); + +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_lock(__libcpp_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS bool __libcpp_mutex_trylock(__libcpp_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_mutex_unlock(__libcpp_mutex_t*); +int __libcpp_mutex_destroy(__libcpp_mutex_t*); + +// +// Condition Variable +// +using __libcpp_condvar_t = int; +#define _LIBCPP_CONDVAR_INITIALIZER 0 + +int __libcpp_condvar_signal(__libcpp_condvar_t*); +int __libcpp_condvar_broadcast(__libcpp_condvar_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_condvar_wait(__libcpp_condvar_t*, __libcpp_mutex_t*); +_LIBCPP_NO_THREAD_SAFETY_ANALYSIS +int __libcpp_condvar_timedwait(__libcpp_condvar_t*, __libcpp_mutex_t*, __libcpp_timespec_t*); +int __libcpp_condvar_destroy(__libcpp_condvar_t*); + +// +// Execute once +// +using __libcpp_exec_once_flag = int; +#define _LIBCPP_EXEC_ONCE_INITIALIZER 0 + +int __libcpp_execute_once(__libcpp_exec_once_flag*, void (*__init_routine)()); + +// +// Thread id +// +using __libcpp_thread_id = int; + +bool __libcpp_thread_id_equal(__libcpp_thread_id, __libcpp_thread_id); +bool __libcpp_thread_id_less(__libcpp_thread_id, __libcpp_thread_id); + +// +// Thread +// +#define _LIBCPP_NULL_THREAD 0 +using __libcpp_thread_t = int; + +bool __libcpp_thread_isnull(const __libcpp_thread_t*); +int __libcpp_thread_create(__libcpp_thread_t*, void* (*__func)(void*), void* __arg); +__libcpp_thread_id __libcpp_thread_get_current_id(); +__libcpp_thread_id __libcpp_thread_get_id(const __libcpp_thread_t*); +int __libcpp_thread_join(__libcpp_thread_t*); +int __libcpp_thread_detach(__libcpp_thread_t*); +void __libcpp_thread_yield(); +void __libcpp_thread_sleep_for(const chrono::nanoseconds&); + +// +// Thread local storage +// +#define _LIBCPP_TLS_DESTRUCTOR_CC 0 +using __libcpp_tls_key = int; + +int __libcpp_tls_create(__libcpp_tls_key*, void (*__at_exit)(void*)); +void* __libcpp_tls_get(__libcpp_tls_key); +int __libcpp_tls_set(__libcpp_tls_key, void*); + +_LIBCPP_END_NAMESPACE_STD diff --git a/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h b/sycl-jit/jit-compiler/lib/resource-includes/include/sycl-rtc-standalone/linux/errno.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index e0dff8aa9ca7e..6cee9d07663c3 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -262,6 +262,42 @@ class SYCLToolchain { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); } + if (UserArgList.hasArg(OPT_sycl_rtc_exp_redist_mode)) { + DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_nostdlibinc)); + auto AddInc = [&](auto RelPath) { + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_isystem), + (getPrefix() + RelPath).str()); + }; + AddInc("include/sycl/stl_wrappers"); + // Contains modified `__config_site` for libc++, need to come earlier in + // the search path: + AddInc("include/sycl-rtc-standalone/"); +#if !defined(_WIN32) + // AFAIK, it only contains original `__config_site` that we don't use (see + // above), but it seems safer to add this path anyway, in case any extra + // files are added. On Windows `LIBCXX_GENERATED_INCLUDE_TARGET_DIR` is + // off and thus we don't need it. + AddInc("include/x86_64-unknown-linux-gnu/c++/v1"); +#endif + AddInc("include/c++/v1"); + AddInc("include/libc"); + AddInc("include/"); + AddInc("include/lib/clang/22/include/"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), + "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES"); +#if defined(_WIN32) + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), + "_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), + "_LIBCPP_NO_VCRUNTIME"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_U), "__ELF__"); + +#endif + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "stdio.h"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "wchar.h"); + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "time.h"); + } + ArgStringList ASL; for (Arg *A : DAL) A->render(DAL, ASL); @@ -543,9 +579,15 @@ class SYCLToolchain { std::vector CommandLine = createCommandLine(UserArgList, Format, SourceFilePath); - auto FS = llvm::makeIntrusiveRefCnt( - llvm::vfs::getRealFileSystem()); - FS->pushOverlay(getToolchainFS()); + llvm::IntrusiveRefCntPtr FS; + if (UserArgList.hasArg(OPT_sycl_rtc_in_memory_fs_only)) { + FS = llvm::makeIntrusiveRefCnt( + getToolchainFS()); + } else { + FS = llvm::makeIntrusiveRefCnt( + llvm::vfs::getRealFileSystem()); + FS->pushOverlay(getToolchainFS()); + } if (FSOverlay) FS->pushOverlay(std::move(FSOverlay)); diff --git a/sycl-jit/jit-compiler/utils/generate.py b/sycl-jit/jit-compiler/utils/generate.py index 449723b8fa268..941d95beb440f 100644 --- a/sycl-jit/jit-compiler/utils/generate.py +++ b/sycl-jit/jit-compiler/utils/generate.py @@ -32,11 +32,11 @@ def main(): const resource_file ToolchainFiles[] = {""" ) - def process_file(file_path): + def process_file(file_path, relative_to): out.write( f""" {{ - {{"{args.prefix}{os.path.relpath(file_path, toolchain_dir).replace(os.sep, "/")}"}} , + {{"{args.prefix}{os.path.relpath(file_path, relative_to).replace(os.sep, "/")}"}} , []() {{ static const char data[] = {{ #embed "{file_path}" if_empty(0) @@ -50,9 +50,17 @@ def process_dir(dir): for root, _, files in os.walk(dir): for file in files: file_path = os.path.join(root, file) - process_file(file_path) + process_file(file_path, dir) process_dir(args.toolchain_dir) + process_dir( + os.path.realpath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "../lib/resource-includes/", + ) + ) + ) out.write( f""" diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 261fc96f0676f..d251c511e40c8 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -1351,6 +1351,57 @@ Some notes about the current behavior: using the conflicting hash would proceed without pre-compiled preamble support as if this option wasn't enabled. +==== `--sycl-rtc-experimental-redist-mode` + +Highly experimental option to facilitate distribution of applications that use +SYCL RTC. + +While SYCL-specific header/bitcode files are embedded/distributed inside +`sycl-jit` DSO we still have a dependency on the C/C++ compiler toolchain on the +end-user system (header files only actually, but that's not too important). That +requirement is troublesome for many of the SYCL RTC customers and this option +tries to address that limitation. + +The main problem we face is that default host toolchain (MSVC/GCC on Win/Lin, +respectively) has license requirements that prohibit distribution of necessary +headers as part of SYCL redistributables. Instead, we embed LLVM's libc++ and +libc headers into the `sycl-jit` DSO and provide this option to use those +headers instead. This obviously comes with many limitations/obstacles. Using +this options results in roughly following options being passed implicitly: + +* `--nostdlibinc`, effectively excluding CUDA/HIP SDKs, unless the application + deals with that somehow. The reason we need to do that is because having + different C library implementation in includes search path doesn't work. + +* Setup virtual file system include paths to enable the use of LLVM's + `libc++`/`libc` headers embedded into `sycl-jit` DSO. + +* Define/undefine some macros found by trial-and-error to make that fragile + configuration work, at least for some examples. + +* Add implicit `-include` directive to several C headers. Again, found by + trial-and-error to make things work for some of the examples we tried. + +This also comes with lots of limitations: + +* First, huge potential for ABI mismatch between host (SYCL app) ABI and the + device code (JIT-compiled via SYCL RTC). As such, it is recommended to limit + types passing the host/device boundary to: + + - Fixed-width integer types + - `float`/`double` + - Aggregate types of the above. + - Anything else needs **very thorough** testing at the SYCL application side. + + Using SYCL/STL types in the device code only, without passing host-device + boundary is expected to work. + +* CUDA/HIP aren't expected to work out-of-the-box, see above (`--nostdlibinc`). + +* Option is **highly** experimentall, **no** support is guaranteed and is + subject to change at **any** time, including the possibility of a complete + removal. + === Known issues and limitations when the language is `sycl` ==== Changing the compiler action or output diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index 0aa48c6992525..a8b6597b3be44 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -27,8 +27,13 @@ extern __DPCPP_SYCL_EXTERNAL_LIBC char *strncpy(char *dest, const char *src, extern __DPCPP_SYCL_EXTERNAL_LIBC int strcmp(const char *s1, const char *s2); extern __DPCPP_SYCL_EXTERNAL_LIBC int strncmp(const char *s1, const char *s2, size_t n); +#ifdef __LLVM_LIBC__ +extern __DPCPP_SYCL_EXTERNAL_LIBC int rand() noexcept; +extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed) noexcept; +#else extern __DPCPP_SYCL_EXTERNAL_LIBC int rand(); extern __DPCPP_SYCL_EXTERNAL_LIBC void srand(unsigned int seed); +#endif extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmax(long long int x, long long int y); extern __DPCPP_SYCL_EXTERNAL long long int __imf_llmin(long long int x, diff --git a/sycl/include/sycl/detail/os_util.hpp b/sycl/include/sycl/detail/os_util.hpp index 6dc084573cc5f..4d20dec82837d 100644 --- a/sycl/include/sycl/detail/os_util.hpp +++ b/sycl/include/sycl/detail/os_util.hpp @@ -82,7 +82,10 @@ class __SYCL_EXPORT OSUtil { /// Checks if specified path is present. static bool isPathPresent(const std::string &Path) { -#ifdef __SYCL_RT_OS_WINDOWS +#ifdef __SYCL_DEVICE_ONLY__ + (void)Path; + return false; +#elif defined(__SYCL_RT_OS_WINDOWS) struct _stat Stat; return !_stat(Path.c_str(), &Stat); #else diff --git a/sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp b/sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp new file mode 100644 index 0000000000000..f162655507445 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp @@ -0,0 +1,79 @@ +// RUN: %{build} -o %t.out + +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out + +// Make sure that debug/test-only option `--sycl-rtc-in-memory-fs-only` works +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} not %t.out --sycl-rtc-in-memory-fs-only | FileCheck %s --check-prefix CHECK-ERROR +// CHECK-ERROR-LABEL: Device compilation failed +// CHECK-ERROR-NEXT: Detailed information: +// CHECK-ERROR: In file included from rtc_0.cpp:2: +// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/sycl.hpp:38: +// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/detail/core.hpp:21: +// CHECK-ERROR-NEXT: In file included from {{.*}}/sycl-jit-toolchain//bin/../include/sycl/accessor.hpp:11: +// CHECK-ERROR-NEXT: {{.*}}/sycl-jit-toolchain//bin/../include/sycl/access/access.hpp:14:10: fatal error: 'type_traits' file not found +// CHECK-ERROR-NEXT: 14 | #include +// CHECK-ERROR-NEXT: | ^~~~~~~~~~~~~ + +// Now actually test the `--sycl-rtc-experimental-redist-mode` option: +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out --sycl-rtc-experimental-redist-mode --sycl-rtc-in-memory-fs-only +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out --sycl-rtc-experimental-redist-mode + +// XFAIL: target-native_cpu +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142 + +// CUDA/HIP have SDK dependencies but exclude system includes so those aren't +// satisfied. +// REQUIRES: target-spir + +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +int main(int argc, char *argv[]) { + sycl::queue q; + std::string source = R"""( + #include + namespace syclext = sycl::ext::oneapi; + namespace syclexp = sycl::ext::oneapi::experimental; + + extern "C" + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) + void foo(int *p) { + *p = 42; + } +)"""; + std::vector opts; + + // Without this we see stack overflows on Win, but for some reason only in + // `--sycl-rtc-in-memory-fs-only` mode when it should really be failing + // earlier. + opts.push_back("-fconstexpr-depth=128"); + + for (int i = 1; i < argc; ++i) + opts.emplace_back(argv[i]); + try { + + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, source); + auto kb_exe = syclexp::build( + kb_src, syclexp::properties{syclexp::build_options{opts}}); + sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo"); + auto *p = sycl::malloc_shared(1, q); + q.submit([&](sycl::handler &cgh) { + cgh.set_args(p); + cgh.single_task(krn); + }).wait(); + std::cout << "Result: " << *p << std::endl; + assert(*p == 42); + sycl::free(p, q); + } catch (const sycl::exception &e) { + // Make `CHECK` lines more portable between Lin/Win: + std::string s = e.what(); + std::replace(s.begin(), s.end(), '\\', '/'); + + std::cout << s; + return 1; + } +} diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 7f3757fc70624..6d41cff6dc4cd 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 29 +// CHECK-NUM-MATCHES: 30 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From d7a20b346da0579f3bce9d7aef0b516649a915f8 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 4 Nov 2025 13:51:47 -0800 Subject: [PATCH 02/15] Address minor CR comments --- buildbot/configure.py | 1 + sycl-jit/jit-compiler/CMakeLists.txt | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/buildbot/configure.py b/buildbot/configure.py index 3353666c9d9a0..4aae72c348f69 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -180,6 +180,7 @@ def do_configure(args, passthrough_args): install_dir = os.path.join(abs_obj_dir, "install") llvm_enable_runtimes = "libcxx" + # Matches `libcxx`'s requirements/platform ABI: if platform.system() != "Windows": llvm_enable_runtimes += ";libcxxabi;libunwind" llvm_enable_runtimes += ";libc" diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 1e8f8ecb76bca..75835d185bce7 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -21,7 +21,7 @@ endif() set(SYCL_JIT_RESOURCE_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install) -set(SYCL_JIT_PREPARE_RESOURCE_COMMANDS) +set(SYCL_JIT_PREPARE_RESOURCE_COMMANDS "") foreach(component IN LISTS SYCL_JIT_RESOURCE_INSTALL_COMPONENTS) list(APPEND SYCL_JIT_PREPARE_RESOURCE_COMMANDS COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR} --prefix ${SYCL_JIT_RESOURCE_INSTALL_DIR} --component "${component}" @@ -45,8 +45,8 @@ add_custom_target(rtc-prepare-resources ${SYCL_JIT_RESOURCE_FILES} ) -set(SYCL_JIT_RUNTIME_RESOURCE_DEPS) -set(SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS) +set(SYCL_JIT_RUNTIME_RESOURCE_DEPS "") +set(SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS "") if ("libcxx" IN_LIST LLVM_ENABLE_RUNTIMES) list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS runtimes-configure) @@ -60,6 +60,9 @@ if ("libcxx" IN_LIST LLVM_ENABLE_RUNTIMES) list(APPEND SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --target generate-cxx-headers COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --prefix ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install --component cxx-headers + # `` functionality isn't really supported on the device, just make + # it pass compilation. The easiest way to do that is to configure `libc++` + # to use "external threading API". COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/sycl-rtc-standalone COMMAND sed 's/_LIBCPP_HAS_THREAD_API_EXTERNAL 0/_LIBCPP_HAS_THREAD_API_EXTERNAL 1/' ${SYCL_JIT_CXX_CONFIG_SITE} > ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/sycl-rtc-standalone/__config_site ) @@ -67,7 +70,6 @@ endif() if ("libc" IN_LIST LLVM_ENABLE_RUNTIMES) list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS runtimes-configure) - # list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS libc) list(APPEND SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --target generate-libc-headers From 02fca663d4477ee9ffb6b004ba810572f7ac15ce Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 4 Nov 2025 13:54:59 -0800 Subject: [PATCH 03/15] Fix typo --- clang/include/clang/Driver/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index cd5fd452ce41b..d7796fee28861 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7566,7 +7566,7 @@ let Visibility = [SYCLRTCOnlyOption] in { HelpText<"Use in-memory system includes">; def sycl_rtc_in_memory_fs_only : Flag<["--"], "sycl-rtc-in-memory-fs-only">, - HelpText<"Disable real filesystem access for SCYl RTC compilation, " + HelpText<"Disable real filesystem access for SYCL RTC compilation, " "debug/testing only">, Flags<[HelpHidden]>; } // let Group = sycl_rtc_only_Group From dab7317edf727727ebb1182ceed9927f712de7d2 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 5 Nov 2025 07:44:13 -0800 Subject: [PATCH 04/15] s/debug/debugging/ --- clang/include/clang/Driver/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d7796fee28861..467fba54657cb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7567,7 +7567,7 @@ let Visibility = [SYCLRTCOnlyOption] in { def sycl_rtc_in_memory_fs_only : Flag<["--"], "sycl-rtc-in-memory-fs-only">, HelpText<"Disable real filesystem access for SYCL RTC compilation, " - "debug/testing only">, + "debugging/testing only">, Flags<[HelpHidden]>; } // let Group = sycl_rtc_only_Group } // let Visibility = [SYCLRTCOnlyOption] From ed22872413564a413d42002c74d5e27718cba235 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 6 Nov 2025 13:37:43 -0800 Subject: [PATCH 05/15] Extra comment --- .../lib/rtc/DeviceCompilation.cpp | 43 ++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 11bfcf7b0435d..e30731222de4f 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -261,15 +261,45 @@ class SYCLToolchain { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); } + // Reasons why this is done here and not in the clang driver: + // + // 1) Unlike libcxx, upstream libc is installed directly into + // `/include` or `//include` together with + // other compiler headers meaning we can't magically turn it on or off + // (unless we introduce a dedicated VFS overlay just for libc). + // 2) Having multiple C libraries in include search paths is unsupported, + // so in order to use LLVM libc we have to remove default system + // includes. That in turn excludes (at the very least) CUDA/HIP SDKs, so + // we want that behavior to be optional. That, in turn, means that + // because of (1) we have to have non-standard libc install location (we + // chose `/include/libc`) and that has no support in the + // clang driver, so we have to add libc headers to system include + // directories manually. + // 3) However, libcxx her search path must combe *before* libc includes, + // but `-isystem` and similar options prepend the list of search paths. + // As such, we can't just have the driver do part of the job and then + // adjust the behavior via extra options, so we need to maintain + // everything on our own. + // 4) We could do everything via custom code in the clang driver, but the + // location of `include/libc` is controlled in this `sycl-jit` project + // and it was slightly more convenient for me to implement it here, at + // least for the downstream implementation. + // 5) Once we upstream SYCL support there will be a use-case to move libc + // headers installation to a separate directory (similar to libcxx), at + // that time we might have support for this in the clang driver + // directly and would be able to avoid doing that here. if (UserArgList.hasArg(OPT_sycl_rtc_exp_redist_mode)) { DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_nostdlibinc)); auto AddInc = [&](auto RelPath) { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_isystem), (getPrefix() + RelPath).str()); }; + // Must come before C/C++ headers as we're intercepting them in those + // wrappers: AddInc("include/sycl/stl_wrappers"); // Contains modified `__config_site` for libc++, need to come earlier in - // the search path: + // the search path. Other headers there don't seem to require any specific + // priority/search path order. AddInc("include/sycl-rtc-standalone/"); #if !defined(_WIN32) // AFAIK, it only contains original `__config_site` that we don't use (see @@ -278,10 +308,19 @@ class SYCLToolchain { // off and thus we don't need it. AddInc("include/x86_64-unknown-linux-gnu/c++/v1"); #endif + // libcxx headers, must come before libc headers: AddInc("include/c++/v1"); + // libc headers, our (SYCL RTC) custom non-standard location: AddInc("include/libc"); + // SYCL/SYCL-related headers actually, because `` and not + // just ``. Can be argued that actual installation layout should + // actually be `include/sycl/ur_api.h` and `include/sycl/sycl/sycl.hpp` + // but that's outside the SYCL RTC scope. I think any relative order in + // relation to libcxx/libc is allowed. AddInc("include/"); - AddInc("include/lib/clang/22/include/"); + // NOTE: `include/lib/clang//include/` is added automatically (we use + // `--nostdlibinc` and not `--nostdinc`). + DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES"); #if defined(_WIN32) From 4b85687dd436a71ef952ca89669c96b51c51a74d Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 6 Nov 2025 14:28:59 -0800 Subject: [PATCH 06/15] [clang][Driver][SYCL] Disable SYCL header search paths under `-nostd[lib]inc` --- clang/lib/Driver/ToolChains/Clang.cpp | 39 ++++++++++++++------------- clang/lib/Driver/ToolChains/SYCL.cpp | 4 ++- clang/test/Driver/sycl-nostdinc.cpp | 9 +++++++ 3 files changed, 33 insertions(+), 19 deletions(-) create mode 100644 clang/test/Driver/sycl-nostdinc.cpp diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3e31098b20bb5..03a274bd85b68 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5045,24 +5045,27 @@ void Clang::ConstructHostCompilerJob(Compilation &C, const JobAction &JA, if (IsMSVCHostCompiler) HostCompileArgs.push_back("/external:W0"); - // Add default header search directories. - SmallString<128> BaseDir(C.getDriver().Dir); - llvm::sys::path::append(BaseDir, "..", "include"); - SmallString<128> SYCLDir(BaseDir); - llvm::sys::path::append(SYCLDir, "sycl"); - // This is used to provide our wrappers around STL headers that provide - // additional functions/template specializations when the user includes those - // STL headers in their programs (e.g., ). - SmallString<128> STLWrappersDir(SYCLDir); - llvm::sys::path::append(STLWrappersDir, "stl_wrappers"); - // Add the SYCL specific header directories as system directories for non - // MSVC compilers. - HostCompileArgs.push_back(IsMSVCHostCompiler ? "/external:I" : "-isystem"); - HostCompileArgs.push_back(TCArgs.MakeArgString(SYCLDir)); - HostCompileArgs.push_back(IsMSVCHostCompiler ? "/external:I" : "-isystem"); - HostCompileArgs.push_back(TCArgs.MakeArgString(STLWrappersDir)); - HostCompileArgs.push_back(IsMSVCHostCompiler ? "/external:I" : "-isystem"); - HostCompileArgs.push_back(TCArgs.MakeArgString(BaseDir)); + namespace options = clang::driver::options; + if (!TCArgs.hasArg(options::OPT_nostdlibinc, options::OPT_nostdinc)) { + // Add default header search directories. + SmallString<128> BaseDir(C.getDriver().Dir); + llvm::sys::path::append(BaseDir, "..", "include"); + SmallString<128> SYCLDir(BaseDir); + llvm::sys::path::append(SYCLDir, "sycl"); + // This is used to provide our wrappers around STL headers that provide + // additional functions/template specializations when the user includes + // those STL headers in their programs (e.g., ). + SmallString<128> STLWrappersDir(SYCLDir); + llvm::sys::path::append(STLWrappersDir, "stl_wrappers"); + // Add the SYCL specific header directories as system directories for non + // MSVC compilers. + HostCompileArgs.push_back(IsMSVCHostCompiler ? "/external:I" : "-isystem"); + HostCompileArgs.push_back(TCArgs.MakeArgString(SYCLDir)); + HostCompileArgs.push_back(IsMSVCHostCompiler ? "/external:I" : "-isystem"); + HostCompileArgs.push_back(TCArgs.MakeArgString(STLWrappersDir)); + HostCompileArgs.push_back(IsMSVCHostCompiler ? "/external:I" : "-isystem"); + HostCompileArgs.push_back(TCArgs.MakeArgString(BaseDir)); + } if (!OutputAdded) { // Add output file to the command line. This is assumed to be prefaced diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index f26a1374217d3..50cc57e93d9e4 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -123,8 +123,10 @@ void SYCLInstallationDetector::getSYCLDeviceLibPath( void SYCLInstallationDetector::addSYCLIncludeArgs( const ArgList &DriverArgs, ArgStringList &CC1Args) const { - if (DriverArgs.hasArg(clang::driver::options::OPT_nobuiltininc)) + namespace options = clang::driver::options; + if (DriverArgs.hasArg(options::OPT_nostdlibinc, options::OPT_nostdinc)) { return; + } // Add the SYCL header search locations in the specified order. // ../include/sycl/stl_wrappers // ../include diff --git a/clang/test/Driver/sycl-nostdinc.cpp b/clang/test/Driver/sycl-nostdinc.cpp new file mode 100644 index 0000000000000..2754f0b5ca2b2 --- /dev/null +++ b/clang/test/Driver/sycl-nostdinc.cpp @@ -0,0 +1,9 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -nostdlibinc -fsyntax-only %s +// RUN: %clangxx -fsycl -fsycl-device-only -nostdinc -fsyntax-only %s + +// RUN: %clangxx -fsycl -nostdlibinc -fsyntax-only %s +// RUN: %clangxx -fsycl -nostdinc -fsyntax-only %s + +#if __has_include() +#error "expected to *not* be able to find SYCL headers" +#endif From 649561ac26e5b884962c4cb1a78ef2d7972ac9dd Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 6 Nov 2025 15:21:29 -0800 Subject: [PATCH 07/15] [WIP] Flip the default for libcxx/libc usage --- clang/include/clang/Driver/Options.td | 6 +- .../lib/rtc/DeviceCompilation.cpp | 25 ++++-- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 76 +++++++------------ ...exp_redist_mode.cpp => in_memory_only.cpp} | 13 +--- 4 files changed, 54 insertions(+), 66 deletions(-) rename sycl/test-e2e/KernelCompiler/{exp_redist_mode.cpp => in_memory_only.cpp} (78%) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ddb2cf69f701c..c6e6adb033cac 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7588,9 +7588,9 @@ let Visibility = [SYCLRTCOnlyOption] in { : Joined<["--"], "persistent-auto-pch=">, HelpText<"Use Persistent Auto-PCH cache located at for SYCL " "RTC Compilation">; - def sycl_rtc_exp_redist_mode - : Flag<["--"], "sycl-rtc-experimental-redist-mode">, - HelpText<"Use in-memory system includes">; + def sycl_rtc_use_system_includes + : Flag<["--"], "sycl-rtc-use-system-includes">, + HelpText<"Use system includes instead of in-memory libcxx/libc">; def sycl_rtc_in_memory_fs_only : Flag<["--"], "sycl-rtc-in-memory-fs-only">, HelpText<"Disable real filesystem access for SYCL RTC compilation, " diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index e30731222de4f..e14d38d862ecc 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -261,7 +261,7 @@ class SYCLToolchain { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_offload_arch_EQ), CPU); } - // Reasons why this is done here and not in the clang driver: + // Reasons why the following is done here and not in the clang driver: // // 1) Unlike libcxx, upstream libc is installed directly into // `/include` or `//include` together with @@ -288,7 +288,22 @@ class SYCLToolchain { // headers installation to a separate directory (similar to libcxx), at // that time we might have support for this in the clang driver // directly and would be able to avoid doing that here. - if (UserArgList.hasArg(OPT_sycl_rtc_exp_redist_mode)) { + + // Prefer using in-memory as that's friendlier for the end users of SYCL + // applications as that mode doesn't require any C/C++ toolchain to be + // installed on the system. + bool UseInMemoryCxxCHeaders = true; + + // Unless explicitly told not to: + if (UserArgList.hasArg(OPT_sycl_rtc_use_system_includes)) + UseInMemoryCxxCHeaders = false; + + // CUDA/HIP need SDK headers that we can't distribute ourselves, so we have + // to use system includes as well: + if (Format == BinaryFormat::PTX || Format == BinaryFormat::AMDGCN) + UseInMemoryCxxCHeaders = false; + + if (UseInMemoryCxxCHeaders) { DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_nostdlibinc)); auto AddInc = [&](auto RelPath) { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_isystem), @@ -310,7 +325,7 @@ class SYCLToolchain { #endif // libcxx headers, must come before libc headers: AddInc("include/c++/v1"); - // libc headers, our (SYCL RTC) custom non-standard location: + // libc headers, our (SYCL RTC) custom non-standard location: AddInc("include/libc"); // SYCL/SYCL-related headers actually, because `` and not // just ``. Can be argued that actual installation layout should @@ -318,8 +333,8 @@ class SYCLToolchain { // but that's outside the SYCL RTC scope. I think any relative order in // relation to libcxx/libc is allowed. AddInc("include/"); - // NOTE: `include/lib/clang//include/` is added automatically (we use - // `--nostdlibinc` and not `--nostdinc`). + // NOTE: `include/lib/clang//include/` is added automatically (we + // use `--nostdlibinc` and not `--nostdinc`). DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES"); diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index d251c511e40c8..5dd828e088859 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -11,6 +11,7 @@ :lang: en :dpcpp: pass:[DPC++] :cpp: pass:[C++] +:libcxx: pass:[libc++] :endnote: —{nbsp}end{nbsp}note // Set the default source code type in this document to C++, @@ -1046,6 +1047,28 @@ sycl::kernel k_float = kb.ext_oneapi_get_kernel("bartmpl"); sycl::kernel k_int = kb.ext_oneapi_get_kernel("bartmpl"); ---- +=== Restrictions on the source code when language is `sycl` + +Currently, SYCL applications are linked with MSVC/GNU C/{cpp} runtimes on Win/Lin +respectively. Unfortunately, we cannot distribute those headers together with +SYCL applications using kernel compiler due to license restrictions. Instead, we +distribute LLVM's {libcxx}/libc headers as part of the kernel compiler and +default to using them for SPIR-V based targets. That results in a restriction +not to pass any data that transitively uses any types defined in C/{cpp} headers +(with the exception of `[u]intN_t` type aliases) as those might be +ABI-incompatible between headers used to compiler device code and those used to +compile the host part of the SYCL application. + +Technical implementation of that disables system include paths entirely (as +having multiple C library implementation in include search paths would break +things) and uses virtual file system containing in-memory copy of LLVM's +{libcxx}/libc headers. That implicitly removes CUDA/HIP SKDs and would break +online compilation for those targets, thus we do *not* employ that mechanism for +those targets. Instead, end user's system is required to have a C/{cpp} toolchain +installation. + +SYCL application can also pass `--sycl-rtc-use-system-includes` option to change +the behavior for SPIR-V targets and force usage of the system toolchain. == Examples @@ -1351,56 +1374,11 @@ Some notes about the current behavior: using the conflicting hash would proceed without pre-compiled preamble support as if this option wasn't enabled. -==== `--sycl-rtc-experimental-redist-mode` +==== `--sycl-rtc-use-system-includes` -Highly experimental option to facilitate distribution of applications that use -SYCL RTC. - -While SYCL-specific header/bitcode files are embedded/distributed inside -`sycl-jit` DSO we still have a dependency on the C/C++ compiler toolchain on the -end-user system (header files only actually, but that's not too important). That -requirement is troublesome for many of the SYCL RTC customers and this option -tries to address that limitation. - -The main problem we face is that default host toolchain (MSVC/GCC on Win/Lin, -respectively) has license requirements that prohibit distribution of necessary -headers as part of SYCL redistributables. Instead, we embed LLVM's libc++ and -libc headers into the `sycl-jit` DSO and provide this option to use those -headers instead. This obviously comes with many limitations/obstacles. Using -this options results in roughly following options being passed implicitly: - -* `--nostdlibinc`, effectively excluding CUDA/HIP SDKs, unless the application - deals with that somehow. The reason we need to do that is because having - different C library implementation in includes search path doesn't work. - -* Setup virtual file system include paths to enable the use of LLVM's - `libc++`/`libc` headers embedded into `sycl-jit` DSO. - -* Define/undefine some macros found by trial-and-error to make that fragile - configuration work, at least for some examples. - -* Add implicit `-include` directive to several C headers. Again, found by - trial-and-error to make things work for some of the examples we tried. - -This also comes with lots of limitations: - -* First, huge potential for ABI mismatch between host (SYCL app) ABI and the - device code (JIT-compiled via SYCL RTC). As such, it is recommended to limit - types passing the host/device boundary to: - - - Fixed-width integer types - - `float`/`double` - - Aggregate types of the above. - - Anything else needs **very thorough** testing at the SYCL application side. - - Using SYCL/STL types in the device code only, without passing host-device - boundary is expected to work. - -* CUDA/HIP aren't expected to work out-of-the-box, see above (`--nostdlibinc`). - -* Option is **highly** experimentall, **no** support is guaranteed and is - subject to change at **any** time, including the possibility of a complete - removal. +Force usage of system C/C++ toolchain headers instead of the in-memory +distribution of LLVM's libc\+\+/libc. Option has no effect if the target +defaults to using system toolchain by default. === Known issues and limitations when the language is `sycl` diff --git a/sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp b/sycl/test-e2e/KernelCompiler/in_memory_only.cpp similarity index 78% rename from sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp rename to sycl/test-e2e/KernelCompiler/in_memory_only.cpp index f162655507445..601b1072cffce 100644 --- a/sycl/test-e2e/KernelCompiler/exp_redist_mode.cpp +++ b/sycl/test-e2e/KernelCompiler/in_memory_only.cpp @@ -1,9 +1,7 @@ // RUN: %{build} -o %t.out -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out - // Make sure that debug/test-only option `--sycl-rtc-in-memory-fs-only` works -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} not %t.out --sycl-rtc-in-memory-fs-only | FileCheck %s --check-prefix CHECK-ERROR +// RUN: %{run} not %t.out --sycl-rtc-in-memory-fs-only --sycl-rtc-use-system-includes | FileCheck %s --check-prefix CHECK-ERROR // CHECK-ERROR-LABEL: Device compilation failed // CHECK-ERROR-NEXT: Detailed information: // CHECK-ERROR: In file included from rtc_0.cpp:2: @@ -14,12 +12,9 @@ // CHECK-ERROR-NEXT: 14 | #include // CHECK-ERROR-NEXT: | ^~~~~~~~~~~~~ -// Now actually test the `--sycl-rtc-experimental-redist-mode` option: -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out --sycl-rtc-experimental-redist-mode --sycl-rtc-in-memory-fs-only -// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out --sycl-rtc-experimental-redist-mode - -// XFAIL: target-native_cpu -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20142 +// Extra check that our in-memory libcxx/libc headers can really work on a +// system with no C/C++ toolchain: +// RUN: %{run} %t.out --sycl-rtc-in-memory-fs-only // CUDA/HIP have SDK dependencies but exclude system includes so those aren't // satisfied. From f7a3ed567986562f92d4ca08b9b29d5b7d1a7d4d Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 13 Nov 2025 14:28:01 -0800 Subject: [PATCH 08/15] Better CMake integration --- buildbot/configure.py | 12 --- sycl-jit/jit-compiler/CMakeLists.txt | 96 +++++++++++-------- .../lib/rtc/DeviceCompilation.cpp | 13 ++- 3 files changed, 61 insertions(+), 60 deletions(-) diff --git a/buildbot/configure.py b/buildbot/configure.py index f0b197b464ace..95a05b54a0e99 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -179,12 +179,6 @@ def do_configure(args, passthrough_args): install_dir = os.path.join(abs_obj_dir, "install") - llvm_enable_runtimes = "libcxx" - # Matches `libcxx`'s requirements/platform ABI: - if platform.system() != "Windows": - llvm_enable_runtimes += ";libcxxabi;libunwind" - llvm_enable_runtimes += ";libc" - cmake_cmd = [ "cmake", "-G", @@ -201,12 +195,6 @@ def do_configure(args, passthrough_args): "-DLLVM_EXTERNAL_LIBDEVICE_SOURCE_DIR={}".format(libdevice_dir), "-DLLVM_EXTERNAL_SYCL_JIT_SOURCE_DIR={}".format(jit_dir), "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects), - "-DLLVM_ENABLE_RUNTIMES={}".format(llvm_enable_runtimes), - "-DLLVM_LIBC_FULL_BUILD=ON", - "-DLLVM_LIBC_ALL_HEADERS=1", - "-DLIBC_CONFIG_PATH={}".format( - os.path.join(abs_src_dir, "sycl-jit/jit-compiler/lib/libc-config") - ), "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), "-DLLVM_BUILD_TOOLS=ON", "-DLLVM_ENABLE_ZSTD={}".format(llvm_enable_zstd), diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 9f49e76492e53..22e42a9d00607 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -28,7 +28,7 @@ endif() set(SYCL_JIT_RESOURCE_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install) -set(SYCL_JIT_PREPARE_RESOURCE_COMMANDS "") +set(SYCL_JIT_PREPARE_RESOURCE_COMMANDS) foreach(component IN LISTS SYCL_JIT_RESOURCE_INSTALL_COMPONENTS) list(APPEND SYCL_JIT_PREPARE_RESOURCE_COMMANDS COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR} --prefix ${SYCL_JIT_RESOURCE_INSTALL_DIR} --component "${component}" @@ -39,6 +39,60 @@ set(SYCL_JIT_RESOURCE_DEPS ${SYCL_JIT_RESOURCE_INSTALL_COMPONENTS}) # OpenCL-Headers doesn't have a corresponding build target: list(FILTER SYCL_JIT_RESOURCE_DEPS EXCLUDE REGEX "^OpenCL-Headers$") +# We also want to embed LLVM's libc/libcxx headers into resource. We don't want +# to use them through LLVM_ENABLE_RUNTIMES for a few reasons though: +# * We configure them in a way that might be incompatible with their normal +# usage +# * We don't want to include them in all/install targets +# As such, configure libc/libcxx via explicit `llvm_ExternalProject_Add` in a +# separate location. +set(SYCL_JIT_RUNTIME_PROJECTS "libc;libcxx") +if (NOT WIN32) + list(APPEND SYCL_JIT_RUNTIME_PROJECTS libcxxabi libunwind) +endif() + +# Couldn't pass -DLLVM_ENABLE_RUNTIMES= through CMAKE_ARGS +# below because semicolon is used as a separate for CMAKE_ARGS itself. +# Workaround by passing it through PASSTHROUGH_PREFIXES by saving/restoring that +# variable's original value. +set(SYCL_JIT_LLVM_ENABLE_RUNTIMES_COPY ${LLVM_ENABLE_RUNTIMES}) +set(LLVM_ENABLE_RUNTIMES ${SYCL_JIT_RUNTIME_PROJECTS}) +llvm_ExternalProject_Add(sycl-jit-extra-headers + ${CMAKE_CURRENT_SOURCE_DIR}/../../runtimes + CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off + -DLLVM_INCLUDE_TESTS=Off + -DLLVM_DEFAULT_TARGET_TRIPLE=${LLVM_TARGET_TRIPLE} + -DLLVM_ENABLE_PROJECTS_USED=${LLVM_ENABLE_PROJECTS_USED} + -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR} + -DLLVM_BUILD_TOOLS=${LLVM_BUILD_TOOLS} + -DCMAKE_C_COMPILER_WORKS=ON + -DCMAKE_CXX_COMPILER_WORKS=ON + -DCMAKE_Fortran_COMPILER_WORKS=ON + -DCMAKE_ASM_COMPILER_WORKS=ON + # libc config options: + -DLLVM_LIBC_FULL_BUILD=ON + -DLLVM_LIBC_ALL_HEADERS=1 + -DLIBC_CONFIG_PATH=${CMAKE_CURRENT_SOURCE_DIR}/lib/libc-config + # libcxx config options: + -DLIBCXX_HAS_EXTERNAL_THREAD_API=ON + TARGET_TRIPLE ${LLVM_TARGET_TRIPLE} + USE_TOOLCHAIN + PASSTHROUGH_PREFIXES LLVM_ENABLE_RUNTIMES + EXCLUDE_FROM_ALL + NO_INSTALL + ) +set(LLVM_ENABLE_RUNTIMES ${SYCL_JIT_LLVM_ENABLE_RUNTIMES_COPY}) +list(APPEND SYCL_JIT_RESOURCE_DEPS sycl-jit-extra-headers-configure) +list(APPEND SYCL_JIT_PREPARE_RESOURCE_COMMANDS + # libc + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --target generate-libc-headers + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins/libc/include ${SYCL_JIT_RESOURCE_INSTALL_DIR}/include/libc + + # libcxx + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --target generate-cxx-headers + COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR}/tools/sycl-jit/jit-compiler/sycl-jit-extra-headers-bins --prefix ${SYCL_JIT_RESOURCE_INSTALL_DIR} --component cxx-headers +) + # This is very hacky and I don't quite know what I'm doing, but it's necessary # to have `resource.cpp` re-generated/re-built when some SYCL header changes. # @@ -52,43 +106,6 @@ add_custom_target(rtc-prepare-resources ${SYCL_JIT_RESOURCE_FILES} ) -set(SYCL_JIT_RUNTIME_RESOURCE_DEPS "") -set(SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS "") - -if ("libcxx" IN_LIST LLVM_ENABLE_RUNTIMES) - list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS runtimes-configure) - - if (WIN32) - set(SYCL_JIT_CXX_CONFIG_SITE ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/c++/v1/__config_site) - else() - set(SYCL_JIT_CXX_CONFIG_SITE ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/x86_64-unknown-linux-gnu/c++/v1/__config_site) - endif() - - list(APPEND SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS - COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --target generate-cxx-headers - COMMAND ${CMAKE_COMMAND} --install ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --prefix ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install --component cxx-headers - # `` functionality isn't really supported on the device, just make - # it pass compilation. The easiest way to do that is to configure `libc++` - # to use "external threading API". - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/sycl-rtc-standalone - COMMAND sed 's/_LIBCPP_HAS_THREAD_API_EXTERNAL 0/_LIBCPP_HAS_THREAD_API_EXTERNAL 1/' ${SYCL_JIT_CXX_CONFIG_SITE} > ${CMAKE_CURRENT_BINARY_DIR}/rtc-resources-install/include/sycl-rtc-standalone/__config_site - ) -endif() - -if ("libc" IN_LIST LLVM_ENABLE_RUNTIMES) - list(APPEND SYCL_JIT_RUNTIME_RESOURCE_DEPS runtimes-configure) - - list(APPEND SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS - COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins --target generate-libc-headers - COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_BINARY_DIR}/runtimes/runtimes-bins/libc/include ${SYCL_JIT_RESOURCE_INSTALL_DIR}/include/libc - ) -endif() - -add_custom_target(rtc-prepare-runtime-resources - DEPENDS ${SYCL_JIT_RUNTIME_RESOURCE_DEPS} - ${SYCL_JIT_PREPARE_RUNTIME_RESOURCE_COMMANDS} -) - add_custom_command( OUTPUT ${SYCL_JIT_RESOURCE_CPP} COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py --toolchain-dir ${SYCL_JIT_RESOURCE_INSTALL_DIR} --output ${SYCL_JIT_RESOURCE_CPP} --prefix ${SYCL_JIT_VIRTUAL_TOOLCHAIN_ROOT} @@ -96,8 +113,6 @@ add_custom_command( rtc-prepare-resources ${SYCL_JIT_RESOURCE_DEPS} ${SYCL_JIT_RESOURCE_FILES} - rtc-prepare-runtime-resources - ${SYCL_JIT_RUNTIME_RESOURCE_DEPS} ${CMAKE_CURRENT_SOURCE_DIR}/utils/generate.py ) @@ -140,7 +155,6 @@ add_custom_command( ${SYCL_JIT_RESOURCE_CPP} ${SYCL_JIT_RESOURCE_DEPS} ${SYCL_JIT_RESOURCE_FILES} - ${SYCL_JIT_RUNTIME_RESOURCE_DEPS} ${CMAKE_CURRENT_SOURCE_DIR}/include/Resource.h ) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 6920aafa62ab9..3ca816717ed2a 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -313,15 +313,14 @@ class SYCLToolchain { // Must come before C/C++ headers as we're intercepting them in those // wrappers: AddInc("include/sycl/stl_wrappers"); - // Contains modified `__config_site` for libc++, need to come earlier in - // the search path. Other headers there don't seem to require any specific - // priority/search path order. + // Extra headers we provide as part of jit-compiler, e.g. + // `__external_threading` and `linux/errno.h` that are needed to make + // LLVM's libc/libcxx work. As far as I know, can be anywhere in the + // includes search path as those files aren't provide anywhere else. AddInc("include/sycl-rtc-standalone/"); #if !defined(_WIN32) - // AFAIK, it only contains original `__config_site` that we don't use (see - // above), but it seems safer to add this path anyway, in case any extra - // files are added. On Windows `LIBCXX_GENERATED_INCLUDE_TARGET_DIR` is - // off and thus we don't need it. + // On Windows `LIBCXX_GENERATED_INCLUDE_TARGET_DIR` is off and thus we + // don't need this. AddInc("include/x86_64-unknown-linux-gnu/c++/v1"); #endif // libcxx headers, must come before libc headers: From da5ee80842a06cc1a733e7f7bbcf39cee25085a5 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 14 Nov 2025 14:58:31 -0800 Subject: [PATCH 09/15] Fix typos/add comments (thanks to Nick) --- .../jit-compiler/lib/rtc/DeviceCompilation.cpp | 14 +++++++++++--- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 3ca816717ed2a..b482f5f6e7556 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -276,15 +276,15 @@ class SYCLToolchain { // chose `/include/libc`) and that has no support in the // clang driver, so we have to add libc headers to system include // directories manually. - // 3) However, libcxx her search path must combe *before* libc includes, + // 3) However, libcxx headers search path must come *before* libc includes, // but `-isystem` and similar options prepend the list of search paths. // As such, we can't just have the driver do part of the job and then // adjust the behavior via extra options, so we need to maintain // everything on our own. // 4) We could do everything via custom code in the clang driver, but the // location of `include/libc` is controlled in this `sycl-jit` project - // and it was slightly more convenient for me to implement it here, at - // least for the downstream implementation. + // and it was slightly more convenient to implement it here, at least + // for the downstream implementation. // 5) Once we upstream SYCL support there will be a use-case to move libc // headers installation to a separate directory (similar to libcxx), at // that time we might have support for this in the clang driver @@ -339,6 +339,11 @@ class SYCLToolchain { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), "_LIBCPP_REMOVE_TRANSITIVE_INCLUDES"); #if defined(_WIN32) + // LLVM's libc implements very limited number of entrypoints on WIN, + // almost to be unusable, so nobody actually cares about using libcxx over + // LLVM libc on that platform. We only use declaration and not definition + // so we force libc to generate more header/entrypoints but it's not + // working well by default. Options below were find by trial-and-error. DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), "_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS"); DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_D), @@ -346,6 +351,9 @@ class SYCLToolchain { DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_U), "__ELF__"); #endif + // Similarly to Windows case above, libcxx over libc isn't fully + // supported upstream, even on Linux. Faced some errors (mostly around + // `_LIBCPP_USING_IF_EXISTS`) if the files below aren't included early: DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "stdio.h"); DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "wchar.h"); DAL.AddJoinedArg(nullptr, OptTable.getOption(OPT_include), "time.h"); diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 5dd828e088859..4e725ceb6e55c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -1062,7 +1062,7 @@ compile the host part of the SYCL application. Technical implementation of that disables system include paths entirely (as having multiple C library implementation in include search paths would break things) and uses virtual file system containing in-memory copy of LLVM's -{libcxx}/libc headers. That implicitly removes CUDA/HIP SKDs and would break +{libcxx}/libc headers. That implicitly removes CUDA/HIP SDKs and would break online compilation for those targets, thus we do *not* employ that mechanism for those targets. Instead, end user's system is required to have a C/{cpp} toolchain installation. From fab3b51edae42a080b6fa88e2250ec573f43077d Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 21 Nov 2025 10:27:38 -0800 Subject: [PATCH 10/15] Update extension --- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 67 ++++++++++++------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index 4e725ceb6e55c..f2fd34a8980e9 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -1047,29 +1047,6 @@ sycl::kernel k_float = kb.ext_oneapi_get_kernel("bartmpl"); sycl::kernel k_int = kb.ext_oneapi_get_kernel("bartmpl"); ---- -=== Restrictions on the source code when language is `sycl` - -Currently, SYCL applications are linked with MSVC/GNU C/{cpp} runtimes on Win/Lin -respectively. Unfortunately, we cannot distribute those headers together with -SYCL applications using kernel compiler due to license restrictions. Instead, we -distribute LLVM's {libcxx}/libc headers as part of the kernel compiler and -default to using them for SPIR-V based targets. That results in a restriction -not to pass any data that transitively uses any types defined in C/{cpp} headers -(with the exception of `[u]intN_t` type aliases) as those might be -ABI-incompatible between headers used to compiler device code and those used to -compile the host part of the SYCL application. - -Technical implementation of that disables system include paths entirely (as -having multiple C library implementation in include search paths would break -things) and uses virtual file system containing in-memory copy of LLVM's -{libcxx}/libc headers. That implicitly removes CUDA/HIP SDKs and would break -online compilation for those targets, thus we do *not* employ that mechanism for -those targets. Instead, end user's system is required to have a C/{cpp} toolchain -installation. - -SYCL application can also pass `--sycl-rtc-use-system-includes` option to change -the behavior for SPIR-V targets and force usage of the system toolchain. - == Examples === Simple example @@ -1376,12 +1353,50 @@ Some notes about the current behavior: ==== `--sycl-rtc-use-system-includes` -Force usage of system C/C++ toolchain headers instead of the in-memory -distribution of LLVM's libc\+\+/libc. Option has no effect if the target -defaults to using system toolchain by default. +Force usage of system C/C++ headers instead of the self-containted versions. +Option has no effect if the target defaults to using system headers by +default. === Known issues and limitations when the language is `sycl` +==== C/{cpp} header files + +By default, the SYCL runtime compiler uses a self-contained set of C and {cpp} +header files when compiling kernels in the `sycl` language (unless target has +other dependencies in system includes). This means that SYCL applications using +this feature can be run even on a system that does not have these headers +installed. However, the self contained header files may not be the same as the C +and {cpp} header files that were used to build the host part of the SYCL +application. As a result, there are additional limitations around data that is +shared between the host part of the application and the kernel. These +limitations apply to arguments that are passed to the kernel and also to data +shared through USM or through accessors. Additionally, other header files might +be installed in the same location as system C headers (e.g., `/usr/include/`). +Those will not be available as well. + +Types that are defined by the compiler (e.g. fundamental types like `int` and +`float`) are guaranteed to have the same representation and alignment +requirements in both the host compiler and in the compiler used to compile the +kernel. Therefore data using these types can be safely shared. However, types +defined by the C or {cpp} library (e.g. types in the `std` namespace) are not +guaranteed to be the same, so data defined using these types cannot be safely +shared. There are a few specific exceptions to this limitation. The following C +/ {cpp} types are guaranteed to have the same representation and alignment +requirements, so data defined as these types can be safely shared: + +* The following fixed-width integer types: `int8_t`, `int16_t`, `int32_t`, +`int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`. +* The types `size_t` and `ptrdiff_t`. + +Array and class types defined by your application are safe to share so long as +the element types are safe. Pointer types are safe to share so long as the +pointed-at type is safe. Enumeration types defined by your application are also +safe. + +It is also possible to use the system C and C++ headers instead of the +self-contained versions. See the description of the +`--sycl-rtc-use-system-includes` option for more details. + ==== Changing the compiler action or output As the {dpcpp} frontend is integrated tightly in the runtime compilation From d76c4df65cbb7f6876e073dd74e76b05836926a8 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Fri, 21 Nov 2025 13:38:56 -0800 Subject: [PATCH 11/15] Add layout test --- .../sycl_host_device_data_layout.cpp | 162 ++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp new file mode 100644 index 0000000000000..b20910603c918 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp @@ -0,0 +1,162 @@ +// RUN: %{build} -o %t.out +// RUN: %if hip %{ env SYCL_JIT_AMDGCN_PTX_TARGET_CPU=%{amd_arch} %} %{run} %t.out + +// UNSUPPORTED: target-native_cpu +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/20142 + +#include +#include +#include + +#include +#include +#include +#include +#include + +#define STRINGIFY(x) #x +#define EXPAND_AND_STRINGIFY(x) STRINGIFY(x) + +namespace syclexp = sycl::ext::oneapi::experimental; +int main() { + sycl::queue q; + std::string src = R"""( +#include +#include + +#include +#include +#include +#include +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +using mint3 = sycl::marray; + +extern "C" +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_out) { + *out = TYPE{INIT}; + *align_out = alignof(TYPE); + *size_out = sizeof(TYPE); + auto Equal = [](const auto &lhs, const auto &rhs) { + using T = std::decay_t; + if constexpr (sycl::detail::is_vec_v || sycl::detail::is_marray_v) { + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); ++i) + if (lhs[i] != rhs[i]) + return false; + + return true; + } else { + return lhs == rhs; + } + }; + *equal_out = Equal(*in, *out); +} +)"""; + auto kb_src = syclexp::create_kernel_bundle_from_source( + q.get_context(), syclexp::source_language::sycl, src); + + auto *align = sycl::malloc_shared(1, q); + auto *size = sycl::malloc_shared(1, q); + auto *equal = sycl::malloc_shared(1, q); + + auto Test = [&](auto val, auto type_str, auto init_str) { + using namespace std::literals::string_literals; + + using type = decltype(val); + auto kb_exe = syclexp::build( + kb_src, + syclexp::properties{syclexp::build_options{std::vector{ + "-DTYPE="s + type_str, "-DINIT="s + init_str}}}); + + sycl::kernel krn = kb_exe.ext_oneapi_get_kernel("foo"); + auto *host = sycl::malloc_shared(1, q); + *host = val; + auto *device = sycl::malloc_shared(1, q); + + q.submit([&](sycl::handler &cgh) { + cgh.set_args(host, device, align, size, equal); + cgh.single_task(krn); + }).wait(); + auto Equal = [](const auto &lhs, const auto &rhs) { + using T = std::decay_t; + if constexpr (sycl::detail::is_vec_v || sycl::detail::is_marray_v) { + if (lhs.size() != rhs.size()) + return false; + + for (size_t i = 0; i < lhs.size(); ++i) + if (lhs[i] != rhs[i]) + return false; + + return true; + } else { + return lhs == rhs; + } + }; + assert(Equal(*host, *device)); + assert(*align == alignof(type)); + assert(*size == sizeof(type)); + assert(*equal == true); + sycl::free(host, q); + sycl::free(device, q); + }; + +#define TEST(TYPE, INIT) \ + Test(TYPE{INIT}, EXPAND_AND_STRINGIFY(TYPE), EXPAND_AND_STRINGIFY(INIT)); +#define TEST2(TYPE, INIT0, INIT1) \ + Test(TYPE{INIT0, INIT1}, EXPAND_AND_STRINGIFY(TYPE), \ + EXPAND_AND_STRINGIFY(INIT0) ", " EXPAND_AND_STRINGIFY(INIT1)); +#define TEST3(TYPE, INIT0, INIT1, INIT2) \ + Test(TYPE{INIT0, INIT1, INIT2}, EXPAND_AND_STRINGIFY(TYPE), \ + EXPAND_AND_STRINGIFY(INIT0) ", " EXPAND_AND_STRINGIFY( \ + INIT1) ", " EXPAND_AND_STRINGIFY(INIT2)); + + TEST(size_t, 0x1122334455667788) + TEST(char, 0x12) + + TEST(int8_t, 0x12) + TEST(int8_t, -0x12) + TEST(uint8_t, 0x12) + + TEST(int16_t, 0x1234) + TEST(int16_t, -0x1234) + TEST(uint16_t, 0x1234) + + TEST(int32_t, 0x12345678) + TEST(int32_t, -0x12345678) + TEST(uint32_t, 0x12345678) + + TEST(int64_t, 0x1122334455667788) + TEST(int64_t, -0x1122334455667788) + TEST(uint64_t, 0x1122334455667788) + + TEST(size_t, 0x1122334455667788) + TEST(ptrdiff_t, 0x1122334455667788) + + TEST(float, 42.0f) + TEST(double, 42.0) + + TEST(sycl::half, 42.0f) + TEST(sycl::ext::oneapi::bfloat16, 42.0f) + + TEST(sycl::range<1>, 0x1122334455667788) + TEST2(sycl::range<2>, 0x1122334455667788, 0x1223344556677889) + TEST3(sycl::range<3>, 0x1122334455667788, 0x1223344556677889, + 0x132435465768798A) + + TEST2(sycl::short2, 0x1234, 0x2345) + TEST3(sycl::short3, 0x1234, 0x2345, 0x3456) + + using mint3 = sycl::marray; + TEST3(mint3, 0x1234, 0x2345, 0x3456) + + sycl::free(align, q); + sycl::free(size, q); + sycl::free(equal, q); +} From 37298362b3f43c540acde2ae936958de9338556a Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 24 Nov 2025 13:50:18 -0800 Subject: [PATCH 12/15] Partially address review feedback --- .../sycl_ext_oneapi_kernel_compiler.asciidoc | 76 +++++++++---------- .../sycl_host_device_data_layout.cpp | 50 +++++++++++- 2 files changed, 87 insertions(+), 39 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc index f2fd34a8980e9..f822d302bdb92 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_compiler.asciidoc @@ -1199,6 +1199,44 @@ this extension. == Non-normative implementation notes for {dpcpp} +=== C/{cpp} header files and limitations when the language is sycl + +By default, the SYCL runtime compiler uses a self-contained set of C and {cpp} +header files when compiling kernels in the `sycl` language. This means that SYCL +applications using this feature can be run even on a system that does not have +these headers installed. However, the self contained header files may not be the +same as the C and {cpp} header files that were used to build the host part of +the SYCL application. As a result, there are additional limitations around data +that is shared between the host part of the application and the kernel. These +limitations apply to arguments that are passed to the kernel and also to data +shared through USM or through accessors. Additionally, other header files might +be installed in the same location as system C headers (e.g., `/usr/include/`). +Those will not be available as well. + +Types that are defined by the compiler (e.g. fundamental types like `int` and +`float`) are guaranteed to have the same representation and alignment +requirements in both the host compiler and in the compiler used to compile the +kernel. Therefore data using these types can be safely shared. However, types +defined by the C or {cpp} library (e.g. types in the `std` namespace) are not +guaranteed to be the same, so data defined using these types cannot be safely +shared. There are a few specific exceptions to this limitation. The following C +/ {cpp} types are guaranteed to have the same representation and alignment +requirements, so data defined as these types can be safely shared: + +* The following fixed-width integer types: `int8_t`, `int16_t`, `int32_t`, +`int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`. +* The types `size_t` and `ptrdiff_t`. + +Array and class types defined by your application are safe to share so long as +the element types are safe. Pointer types are safe to share so long as the +pointed-at type is safe. Enumeration types defined by your application are also +safe. + +It is also possible to use the system C and C++ headers instead of the +self-contained versions. See the description of the +`--sycl-rtc-use-system-includes` option for more details. + + === Supported `build_options` when the language is `sycl` The SYCL runtime compiler supports the following {dpcpp} options to be passed in @@ -1359,44 +1397,6 @@ default. === Known issues and limitations when the language is `sycl` -==== C/{cpp} header files - -By default, the SYCL runtime compiler uses a self-contained set of C and {cpp} -header files when compiling kernels in the `sycl` language (unless target has -other dependencies in system includes). This means that SYCL applications using -this feature can be run even on a system that does not have these headers -installed. However, the self contained header files may not be the same as the C -and {cpp} header files that were used to build the host part of the SYCL -application. As a result, there are additional limitations around data that is -shared between the host part of the application and the kernel. These -limitations apply to arguments that are passed to the kernel and also to data -shared through USM or through accessors. Additionally, other header files might -be installed in the same location as system C headers (e.g., `/usr/include/`). -Those will not be available as well. - -Types that are defined by the compiler (e.g. fundamental types like `int` and -`float`) are guaranteed to have the same representation and alignment -requirements in both the host compiler and in the compiler used to compile the -kernel. Therefore data using these types can be safely shared. However, types -defined by the C or {cpp} library (e.g. types in the `std` namespace) are not -guaranteed to be the same, so data defined using these types cannot be safely -shared. There are a few specific exceptions to this limitation. The following C -/ {cpp} types are guaranteed to have the same representation and alignment -requirements, so data defined as these types can be safely shared: - -* The following fixed-width integer types: `int8_t`, `int16_t`, `int32_t`, -`int64_t`, `uint8_t`, `uint16_t`, `uint32_t`, `uint64_t`. -* The types `size_t` and `ptrdiff_t`. - -Array and class types defined by your application are safe to share so long as -the element types are safe. Pointer types are safe to share so long as the -pointed-at type is safe. Enumeration types defined by your application are also -safe. - -It is also possible to use the system C and C++ headers instead of the -self-contained versions. See the description of the -`--sycl-rtc-use-system-includes` option for more details. - ==== Changing the compiler action or output As the {dpcpp} frontend is integrated tightly in the runtime compilation diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp index b20910603c918..7e654234ea9d4 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp @@ -17,6 +17,22 @@ #define STRINGIFY(x) #x #define EXPAND_AND_STRINGIFY(x) STRINGIFY(x) +// Needs to be duplicated between host/device. @{ + +// Comma would make preprocessor macro trickier. +using mint3 = sycl::marray; + +enum E { + V0 = 0x12345689, +}; +static_assert(sizeof(E) == 4); +enum class ScopedE { + ScopedV0 = 0x12345689, +}; +static_assert(sizeof(ScopedE) == 4); + +// }@ + namespace syclexp = sycl::ext::oneapi::experimental; int main() { sycl::queue q; @@ -35,6 +51,15 @@ namespace syclexp = sycl::ext::oneapi::experimental; using mint3 = sycl::marray; +enum E { + V0 = 0x12345689, +}; +static_assert(sizeof(E) == 4); +enum class ScopedE { + ScopedV0 = 0x12345689, +}; +static_assert(sizeof(ScopedE) == 4); + extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_out) { @@ -150,12 +175,35 @@ void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_o TEST3(sycl::range<3>, 0x1122334455667788, 0x1223344556677889, 0x132435465768798A) + TEST(sycl::id<1>, 0x1122334455667788) + TEST2(sycl::id<2>, 0x1122334455667788, 0x1223344556677889) + TEST3(sycl::id<3>, 0x1122334455667788, 0x1223344556677889, + 0x132435465768798A) + + // Making these work with macros would be too much work: + Test(sycl::nd_range<1>{{0x1122334455667788}, {0x1223344556677889}}, + "sycl::nd_range<1>", "{0x1122334455667788}, {0x1223344556677889}"); + Test(sycl::nd_range<2>{{0x1122334455667788, 0x2132435465768798}, + {0x1223344556677889, 0x2233445586778899}}, + "sycl::nd_range<2>", + "{0x1122334455667788, 0x2132435465768798}, {0x1223344556677889, " + "0x2233445586778899}"); + Test( + sycl::nd_range<3>{ + {0x1122334455667788, 0x2132435465768798, 0x31525364758697A8}, + {0x1223344556677889, 0x2233445586778899, 0x32435465768798A9}}, + "sycl::nd_range<3>", + "{0x1122334455667788, 0x2132435465768798, 0x31525364758697A8}, " + "{0x1223344556677889, 0x2233445586778899, 0x32435465768798A9}"); + TEST2(sycl::short2, 0x1234, 0x2345) TEST3(sycl::short3, 0x1234, 0x2345, 0x3456) - using mint3 = sycl::marray; TEST3(mint3, 0x1234, 0x2345, 0x3456) + TEST(E, V0) + TEST(ScopedE, ScopedE::ScopedV0) + sycl::free(align, q); sycl::free(size, q); sycl::free(equal, q); From 5f88bca2dd74ab8e30153bbc18060d60160dc68b Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 24 Nov 2025 14:04:06 -0800 Subject: [PATCH 13/15] clang-format --- sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp index 7e654234ea9d4..999d7b1835247 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp @@ -177,8 +177,7 @@ void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_o TEST(sycl::id<1>, 0x1122334455667788) TEST2(sycl::id<2>, 0x1122334455667788, 0x1223344556677889) - TEST3(sycl::id<3>, 0x1122334455667788, 0x1223344556677889, - 0x132435465768798A) + TEST3(sycl::id<3>, 0x1122334455667788, 0x1223344556677889, 0x132435465768798A) // Making these work with macros would be too much work: Test(sycl::nd_range<1>{{0x1122334455667788}, {0x1223344556677889}}, From 1067a089fe234300ba855a2a4ba1a106c4f3150a Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 24 Nov 2025 15:40:52 -0800 Subject: [PATCH 14/15] fp64 is optional --- .../test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp index 999d7b1835247..159ed3c1679aa 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp @@ -165,7 +165,10 @@ void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_o TEST(ptrdiff_t, 0x1122334455667788) TEST(float, 42.0f) - TEST(double, 42.0) + if (q.get_device().has(sycl::aspect::fp64)) { + TEST(double, 42.0) + } + TEST(sycl::half, 42.0f) TEST(sycl::ext::oneapi::bfloat16, 42.0f) From 8a39ac65c43234398c54d0f9114bdedb55fb4075 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Tue, 25 Nov 2025 10:24:38 -0800 Subject: [PATCH 15/15] clang-format --- sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp index 159ed3c1679aa..b1a62f2e1ca3b 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_host_device_data_layout.cpp @@ -169,7 +169,6 @@ void foo(TYPE *in, TYPE *out, size_t *align_out, size_t *size_out, bool *equal_o TEST(double, 42.0) } - TEST(sycl::half, 42.0f) TEST(sycl::ext::oneapi::bfloat16, 42.0f)