From eed2d03e611a7848f896445f399a004fd53c725a Mon Sep 17 00:00:00 2001 From: Udit Agarwal Date: Mon, 28 Oct 2024 10:23:50 -0700 Subject: [PATCH 1/9] [SYCL] Repurpose SYCL_CACHE_TRACE to enable fine-grained tracing of SYCL caches (#15822) Currently, we use SYCL_CACHE_TRACE for events in persistent cache only. This PR repurposes SYCL_CACHE_TRACE to also enable tracing of in-memory cache and kernel_compiler. After this change, SYCL_CACHE_TRACE will accept the following bit-masks: | Bit-mask | Corresponding cache tracing | | ------ | ----------- | | 0x01 | Enable tracing of persistent cache | | 0x02 | Enable tracing of in-memory cache | | 0x04 | Enable tracing of `kernel_compiler` cache | Any valid combination of the above bit-masks can be used to enable/disable tracing of the corresponding caches. --------- Co-authored-by: Steffen Larsen --- sycl/doc/EnvironmentVariables.md | 13 ++- sycl/source/detail/config.def | 2 +- sycl/source/detail/config.hpp | 58 ++++++++++++ .../detail/persistent_device_code_cache.hpp | 7 +- .../KernelAndProgram/test_cache_jit_aot.cpp | 4 +- sycl/unittests/config/ConfigTests.cpp | 92 +++++++++++++++++++ 6 files changed, 169 insertions(+), 7 deletions(-) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 403d21301cbde..3172bc2446aee 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -210,7 +210,7 @@ variables in production code. | `SYCL_USE_KERNEL_SPV` | Path to the SPIR-V binary | Load device image from the specified file. If runtime is unable to read the file, `sycl::runtime_error` exception is thrown. The image is assumed to have been created using the `-fno-sycl-dead-args-optimization` option. | | `SYCL_DUMP_IMAGES` | Any(\*) | Dump device image binaries to file. Control has no effect if `SYCL_USE_KERNEL_SPV` is set. | | `SYCL_HOST_UNIFIED_MEMORY` | Integer | Enforce host unified memory support or lack of it for the execution graph builder. If set to 0, it is enforced as not supported by all devices. If set to 1, it is enforced as supported by all devices. | -| `SYCL_CACHE_TRACE` | Any(\*) | If the variable is set, messages are sent to std::cerr when caching events or non-blocking failures happen (e.g. unable to access cache item file). | +| `SYCL_CACHE_TRACE` | Described [below](#sycl_cache_trace-options). | Enable tracing for different SYCL and `kernel_compiler` caches. | | `SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE` | Any(\*) | Enables tracing of `parallel_for` invocations with rounded-up ranges. | | `SYCL_PI_SUPPRESS_ERROR_MESSAGE` | Any(\*) | Suppress printing of error message, only used for CI in order not to interrupt errors generated by underlying toolchains; note that the variable only modifies the printing of the error message (error value, name, description and location), the handling of error return code and aborting/throwing behaviour remains unchanged. | | `SYCL_JIT_COMPILER_DEBUG` | Any(\*) | Passes can specify their own debug types, `sycl-spec-const-materializer` enables debug output generation in specialization constants materialization pass. | @@ -245,6 +245,17 @@ Supported tracing levels are in the table below | 2 | Enable tracing of the UR calls | | -1 | Enable all levels of tracing | +### `SYCL_CACHE_TRACE` Options + +`SYCL_CACHE_TRACE` accepts a bit-mask to control the tracing of different SYCL caches. The input value is parsed as an integer and the following bit-masks are used to determine the tracing behavior: +| Bit-mask | Corresponding cache tracing | +| ------ | ----------- | +| 0x01 | Enable tracing of persistent cache | +| 0x02 | Enable tracing of in-memory cache | +| 0x04 | Enable tracing of `kernel_compiler` cache | + +Any valid combination of the above bit-masks can be used to enable/disable tracing of the corresponding caches. If the input value is not 0 and not a valid number, the disk cache tracing will be enabled (deprecated behavior). +The default value is 0 and no tracing is enabled. ## Debugging variables for Level Zero Plugin diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 5ffd52a319bdb..9172df2a1497b 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -26,7 +26,7 @@ CONFIG(SYCL_PROGRAM_APPEND_COMPILE_OPTIONS, 64, __SYCL_PROGRAM_APPEND_COMPILE_OP CONFIG(SYCL_HOST_UNIFIED_MEMORY, 1, __SYCL_HOST_UNIFIED_MEMORY) // 260 (Windows limit) - 12 (filename) - 84 (cache directory structure) CONFIG(SYCL_CACHE_DIR, 164, __SYCL_CACHE_DIR) -CONFIG(SYCL_CACHE_TRACE, 1, __SYCL_CACHE_TRACE) +CONFIG(SYCL_CACHE_TRACE, 4, __SYCL_CACHE_TRACE) CONFIG(SYCL_CACHE_DISABLE_PERSISTENT, 1, __SYCL_CACHE_DISABLE_PERSISTENT) CONFIG(SYCL_CACHE_PERSISTENT, 1, __SYCL_CACHE_PERSISTENT) CONFIG(SYCL_CACHE_EVICTION_DISABLE, 1, __SYCL_CACHE_EVICTION_DISABLE) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 599f21f02e1ce..49bef4fbb6cf1 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -698,6 +698,64 @@ template <> class SYCLConfig { } }; +// SYCL_CACHE_TRACE accepts a bit-mask to control the tracing of +// different SYCL caches. The input value is parsed as an integer and +// the following bit-masks is used to determine the tracing behavior: +// 0x01 - trace disk cache +// 0x02 - trace in-memory cache +// 0x04 - trace kernel_compiler cache +// Any valid combination of the above bit-masks can be used to enable/disable +// tracing of the corresponding caches. If the input value is not null and +// not a valid number, the disk cache tracing will be enabled (depreciated +// behavior). The default value is 0 and no tracing is enabled. +template <> class SYCLConfig { + using BaseT = SYCLConfigBase; + enum TraceBitmask { DiskCache = 1, InMemCache = 2, KernelCompiler = 4 }; + +public: + static unsigned int get() { return getCachedValue(); } + static void reset() { (void)getCachedValue(true); } + static bool isTraceDiskCache() { + return getCachedValue() & TraceBitmask::DiskCache; + } + static bool isTraceInMemCache() { + return getCachedValue() & TraceBitmask::InMemCache; + } + static bool isTraceKernelCompiler() { + return getCachedValue() & TraceBitmask::KernelCompiler; + } + +private: + static unsigned int getCachedValue(bool ResetCache = false) { + const auto Parser = []() { + const char *ValStr = BaseT::getRawValue(); + int intVal = 0; + + if (ValStr) { + try { + intVal = std::stoi(ValStr); + } catch (...) { + // If the value is not null and not a number, it is considered + // to enable disk cache tracing. This is the legacy behavior. + intVal = 1; + } + } + + // Legacy behavior. + if (intVal > 7) + intVal = 1; + + return intVal; + }; + + static unsigned int Level = Parser(); + if (ResetCache) + Level = Parser(); + + return Level; + } +}; + #undef INVALID_CONFIG_EXCEPTION } // namespace detail diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp index e2b3c8f72c4da..868c247f28903 100644 --- a/sycl/source/detail/persistent_device_code_cache.hpp +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -190,9 +190,10 @@ class PersistentDeviceCodeCache { /* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/ static void trace(const std::string &msg) { - static const char *TraceEnabled = SYCLConfig::get(); - if (TraceEnabled) - std::cerr << "*** Code caching: " << msg << std::endl; + static const bool traceEnabled = + SYCLConfig::isTraceDiskCache(); + if (traceEnabled) + std::cerr << "[Persistent Cache]: " << msg << std::endl; } }; } // namespace detail diff --git a/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp b/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp index 9f0941e50987b..bea437200bdba 100644 --- a/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp +++ b/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp @@ -66,8 +66,8 @@ // RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes RESULT1 // ****************************** -// CHECK-CACHE-WRITE: Code caching: device binary has been cached -// CHECK-CACHE-READ: Code caching: using cached device binary +// CHECK-CACHE-WRITE: [Persistent Cache]: device binary has been cached +// CHECK-CACHE-READ: [Persistent Cache]: using cached device binary // RESULT1: Result (0): 1 // RESULT1: Result (1): 1 diff --git a/sycl/unittests/config/ConfigTests.cpp b/sycl/unittests/config/ConfigTests.cpp index 50eaf2f8816e2..3022ccbd52e65 100644 --- a/sycl/unittests/config/ConfigTests.cpp +++ b/sycl/unittests/config/ConfigTests.cpp @@ -232,3 +232,95 @@ TEST(ConfigTests, CheckConfigProcessing) { sycl::detail::SYCLConfig< sycl::detail::SYCL_PRINT_EXECUTION_GRAPH>::get()); } + +// SYCL_CACHE_TRACE accepts a bit-mask to control the tracing of +// different SYCL caches. The input value is parsed as an integer and +// the following bit-masks is used to determine the tracing behavior: +// 0x01 - trace disk cache +// 0x02 - trace in-memory cache +// 0x04 - trace kernel_compiler cache +// Any valid combination of the above bit-masks can be used to enable/disable +// tracing of the corresponding caches. If the input value is not null and +// not a valid number, the disk cache tracing will be enabled (depreciated +// behavior). The default value is 0 and no tracing is enabled. +using namespace sycl::detail; +TEST(ConfigTests, CheckSyclCacheTraceTest) { + + // Lambda to test parsing of SYCL_CACHE_TRACE + auto TestConfig = [](int expectedValue, int expectedDiskCache, + int expectedInMemCache, int expectedKernelCompiler) { + EXPECT_EQ(static_cast(expectedValue), + SYCLConfig::get()); + + EXPECT_EQ( + expectedDiskCache, + static_cast( + sycl::detail::SYCLConfig::isTraceDiskCache())); + EXPECT_EQ( + expectedInMemCache, + static_cast( + sycl::detail::SYCLConfig::isTraceInMemCache())); + EXPECT_EQ(expectedKernelCompiler, + static_cast(sycl::detail::SYCLConfig< + SYCL_CACHE_TRACE>::isTraceKernelCompiler())); + }; + + // Lambda to set SYCL_CACHE_TRACE + auto SetSyclCacheTraceEnv = [](const char *value) { +#ifdef _WIN32 + _putenv_s("SYCL_CACHE_TRACE", value); +#else + setenv("SYCL_CACHE_TRACE", value, 1); +#endif + }; + + SetSyclCacheTraceEnv("0"); + sycl::detail::readConfig(true); + TestConfig(0, 0, 0, 0); + + SetSyclCacheTraceEnv("1"); + sycl::detail::SYCLConfig::reset(); + TestConfig(1, 1, 0, 0); + + SetSyclCacheTraceEnv("2"); + sycl::detail::SYCLConfig::reset(); + TestConfig(2, 0, 1, 0); + + SetSyclCacheTraceEnv("3"); + sycl::detail::SYCLConfig::reset(); + TestConfig(3, 1, 1, 0); + + SetSyclCacheTraceEnv("4"); + sycl::detail::SYCLConfig::reset(); + TestConfig(4, 0, 0, 1); + + SetSyclCacheTraceEnv("5"); + sycl::detail::SYCLConfig::reset(); + TestConfig(5, 1, 0, 1); + + SetSyclCacheTraceEnv("6"); + sycl::detail::SYCLConfig::reset(); + TestConfig(6, 0, 1, 1); + + SetSyclCacheTraceEnv("7"); + sycl::detail::SYCLConfig::reset(); + TestConfig(7, 1, 1, 1); + + SetSyclCacheTraceEnv("8"); + sycl::detail::SYCLConfig::reset(); + TestConfig(1, 1, 0, 0); + + // Set random non-null value. It should default to 1. + SetSyclCacheTraceEnv("random"); + sycl::detail::SYCLConfig::reset(); + TestConfig(1, 1, 0, 0); + + // When SYCL_CACHE_TRACE is not set, it should default to 0. +#ifdef _WIN32 + _putenv_s("SYCL_CACHE_TRACE", ""); +#else + unsetenv("SYCL_CACHE_TRACE"); +#endif + sycl::detail::SYCLConfig::reset(); + TestConfig(0, 0, 0, 0); +} From 475adaa746b2da4e96d129fa963e4d317210e54e Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Mon, 28 Oct 2024 14:57:06 -0700 Subject: [PATCH 2/9] [L0] Enable Sysman Thru Env by default and have zesInit be optional (#15894) -pre-commit PR for https://github.com/oneapi-src/unified-runtime/pull/2242 Signed-off-by: Neil R. Spruit --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 7a8a290d9fcca..060f402ee7c35 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,5 +1,7 @@ -# commit 66ba7970a6badf781226b75a98c9585ef30ea93a -# Author: Maosu Zhao -# Date: Mon Oct 28 18:39:57 2024 +0800 -# [DeviceSanitizer] Add a report flag to LaunchInfo (#2069) -set(UNIFIED_RUNTIME_TAG 66ba7970a6badf781226b75a98c9585ef30ea93a) +# commit dbd168cbed2d2590b47904728cd5762f1c2f4c6b (HEAD, origin/main, origin/HEAD) +# Merge: 694c1b9a 27ad3f7d +# Author: Piotr Balcer +# Date: Mon Oct 28 16:29:45 2024 +0100 +# Merge pull request #2242 from nrspruit/sysman_env_disable +# [L0] Enable Sysman Thru Env by default and have zesInit be optional +set(UNIFIED_RUNTIME_TAG dbd168cbed2d2590b47904728cd5762f1c2f4c6b) From 3a8cfdfd5fc4b72d242f671d6f4d68043bd7b016 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 28 Oct 2024 20:09:17 -0700 Subject: [PATCH 3/9] [NFC][SYCL] Simplify properties' definitions in `virtual_functions.hpp` (#15900) We've been using simpler way to define them since https://github.com/intel/llvm/pull/12831 but somehow that extension missed that. --- .../oneapi/experimental/virtual_functions.hpp | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp index a3100dc1a2657..4e1d0e13eb623 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/virtual_functions.hpp @@ -6,7 +6,8 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -struct indirectly_callable_key { +struct indirectly_callable_key + : detail::compile_time_property_key { template using value_t = sycl::ext::oneapi::experimental::property_value indirectly_callable; template inline constexpr indirectly_callable_key::value_t indirectly_callable_in; -struct calls_indirectly_key { +struct calls_indirectly_key + : detail::compile_time_property_key { template using value_t = sycl::ext::oneapi::experimental::property_value inline constexpr calls_indirectly_key::value_t assume_indirect_calls_to; -template <> struct is_property_key : std::true_type {}; -template <> struct is_property_key : std::true_type {}; - namespace detail { -template <> -struct IsCompileTimeProperty : std::true_type {}; -template <> -struct IsCompileTimeProperty : std::true_type {}; - -template <> struct PropertyToKind { - static constexpr PropKind Kind = PropKind::IndirectlyCallable; -}; - -template <> struct PropertyToKind { - static constexpr PropKind Kind = PropKind::CallsIndirectly; -}; - template struct PropertyMetaInfo> { static constexpr const char *name = "indirectly-callable"; From 892cbc52edfd37d050801dedcf7ae422d3d14a7b Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 28 Oct 2024 20:10:36 -0700 Subject: [PATCH 4/9] [NFCI][SYCL] Move `IsProperty[*Value]` to `property[_value].hpp` (#15901) This should make it easier to follow the logic and understand if template parameters (like `PropertyT`) refer to keys or values. --- .../sycl/ext/oneapi/properties/property.hpp | 7 +++++ .../ext/oneapi/properties/property_utils.hpp | 27 ++----------------- .../ext/oneapi/properties/property_value.hpp | 18 ++++++++++--- .../include_deps/sycl_detail_core.hpp.cpp | 2 +- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 33228130ec36d..d1797b08ce324 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -264,6 +264,13 @@ struct IsCompileTimeProperty std::is_base_of_v && std::is_base_of_v> {}; +// Checks if a type is either a runtime property or if it is a compile-time +// property +template struct IsProperty { + static constexpr bool value = + IsRuntimeProperty::value || IsCompileTimeProperty::value; +}; + // Trait for property compile-time meta names and values. template struct PropertyMetaInfo { // Some properties don't have meaningful compile-time values. diff --git a/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp b/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp index 804dc6f4279cd..3280d77aa6258 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property_utils.hpp @@ -12,7 +12,8 @@ #include // for mp_list #include // for mp_rename #include // for mp_bool -#include // for PropertyID, IsRun... +#include +#include #include // for tuple_element #include // for size_t @@ -48,34 +49,10 @@ struct PrependTuple> { using type = std::tuple; }; -// Checks if a type T has a static value member variable. -template struct HasValue : std::false_type {}; -template -struct HasValue : std::true_type {}; - //****************************************************************************** // Property identification //****************************************************************************** -// Checks if a type is a compile-time property values. -// Note: This is specialized for property_value elsewhere. -template -struct IsCompileTimePropertyValue : std::false_type {}; - -// Checks if a type is either a runtime property or if it is a compile-time -// property -template struct IsProperty { - static constexpr bool value = - IsRuntimeProperty::value || IsCompileTimeProperty::value; -}; - -// Checks if a type is a valid property value, i.e either runtime property or -// property_value with a valid compile-time property -template struct IsPropertyValue { - static constexpr bool value = - IsRuntimeProperty::value || IsCompileTimePropertyValue::value; -}; - // Checks that all types in a tuple are valid properties. template struct AllPropertyValues {}; template diff --git a/sycl/include/sycl/ext/oneapi/properties/property_value.hpp b/sycl/include/sycl/ext/oneapi/properties/property_value.hpp index e208f59d85830..dc7d13145677d 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property_value.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property_value.hpp @@ -8,8 +8,7 @@ #pragma once -#include // for IsCompileTi... -#include // for HasValue +#include #include // for enable_if_t @@ -18,6 +17,11 @@ inline namespace _V1 { namespace ext::oneapi::experimental { namespace detail { +// Checks if a type T has a static value member variable. +template struct HasValue : std::false_type {}; +template +struct HasValue : std::true_type {}; + // Base class for property values with a single non-type value template struct SingleNontypePropertyValueBase {}; @@ -80,11 +84,19 @@ template struct PropertyID> : PropertyID {}; -// Specialization of IsCompileTimePropertyValue for property values. +// Checks if a type is a compile-time property values. +template +struct IsCompileTimePropertyValue : std::false_type {}; template struct IsCompileTimePropertyValue> : IsCompileTimeProperty {}; +// Checks if a type is a valid property value, i.e either runtime property or +// property_value with a valid compile-time property +template struct IsPropertyValue { + static constexpr bool value = + IsRuntimeProperty::value || IsCompileTimePropertyValue::value; +}; } // namespace detail } // namespace ext::oneapi::experimental } // namespace _V1 diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index d7141579c0f48..e9555d3ad95d1 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -136,8 +136,8 @@ // CHECK-NEXT: ext/oneapi/device_global/properties.hpp // CHECK-NEXT: ext/oneapi/properties/property.hpp // CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/properties/properties.hpp +// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp // CHECK-NEXT: ext/oneapi/experimental/graph.hpp // CHECK-NEXT: handler.hpp // CHECK-NEXT: detail/cl.h From 9143b2c2334a1abe4eb03b7382918831cd316e27 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Mon, 28 Oct 2024 20:50:38 -0700 Subject: [PATCH 5/9] [SYCL] Modernize prefetch property_key creation (#15904) Simplified key creation was introduced in https://github.com/intel/llvm/pull/12831 but this one was never updated. I think it happened because this property never defined `PropKind` enum entry and I'm not even sure how it worked or what are scenarios where it was broken. Regardless, fix and unify with all other properties with this PR. --- sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp | 5 ++--- sycl/include/sycl/ext/oneapi/properties/property.hpp | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp index c5be683830183..441e32a085990 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/prefetch.hpp @@ -20,7 +20,8 @@ enum class cache_level { L1 = 0, L2 = 1, L3 = 2, L4 = 3 }; struct nontemporal; -struct prefetch_hint_key { +struct prefetch_hint_key + : detail::compile_time_property_key { template using value_t = property_value namespace detail { using namespace sycl::detail; -template <> struct IsCompileTimeProperty : std::true_type {}; - template struct PropertyMetaInfo> { static constexpr const char *name = std::is_same_v diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index d1797b08ce324..351bac8044d98 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -214,8 +214,9 @@ enum PropKind : uint32_t { ResponseCapacity = 73, MaxWorkGroupSize = 74, MaxLinearWorkGroupSize = 75, + Prefetch = 76, // PropKindSize must always be the last value. - PropKindSize = 76, + PropKindSize = 77, }; struct property_key_base_tag {}; From eab2dfcf8255b096cbbc0072abf48d65249c3b81 Mon Sep 17 00:00:00 2001 From: Udit Agarwal Date: Tue, 29 Oct 2024 00:20:55 -0700 Subject: [PATCH 6/9] [Driver] Do not compress bundle when offloading to HIP (#15881) Fixes: https://github.com/intel/llvm/issues/15829 **Problem** `--offload-compress` is being used by HIP in `clang-offload-bundler` and by us in `clang-offload-wrapper`. When we use ` --offload-compress` for SYCL offloading to HIP, the device images gets compressed twice: once in `offload-bundler` and then in `offload-wrapper`. **~Solution~ Workaround** This PR intends to disable compression in `clang-offload-bundler` when offloading to HIP. --- clang/lib/Driver/ToolChains/HIPUtility.cpp | 5 ++++- clang/test/Driver/sycl-offload-wrapper-compression.cpp | 6 ++++++ sycl/test-e2e/Compression/compression.cpp | 3 --- sycl/test-e2e/Compression/compression_multiple_tu.cpp | 3 --- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index b3adfe65402ff..8f6b305aaf691 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -324,7 +324,10 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, Args.MakeArgString(std::string("-output=").append(Output)); BundlerArgs.push_back(BundlerOutputArg); - addOffloadCompressArgs(Args, BundlerArgs); + // For SYCL, the compression is occurring during the wrapping step, so we do + // not want to do additional compression here. + if (!JA.isDeviceOffloading(Action::OFK_SYCL)) + addOffloadCompressArgs(Args, BundlerArgs); const char *Bundler = Args.MakeArgString( T.getToolChain().GetProgramPath("clang-offload-bundler")); diff --git a/clang/test/Driver/sycl-offload-wrapper-compression.cpp b/clang/test/Driver/sycl-offload-wrapper-compression.cpp index 1ef9282ee3598..9e9aa437047d4 100644 --- a/clang/test/Driver/sycl-offload-wrapper-compression.cpp +++ b/clang/test/Driver/sycl-offload-wrapper-compression.cpp @@ -10,5 +10,11 @@ // RUN: %clangxx -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS // RUN: %clangxx -### -fsycl --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS +// For SYCL offloading to HIP, make sure we don't pass '--compress' to offload-bundler. +// RUN: %clangxx -### -fsycl --offload-compress -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx1031 -fsycl-targets=amdgcn-amd-amdhsa -fno-sycl-libspirv -nogpulib %s &> %t.driver +// RUN: FileCheck %s --check-prefix=CHECK-NO-COMPRESS-BUNDLER --input-file=%t.driver + // CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}} // CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compression-level=3"{{.*}} + +// CHECK-NO-COMPRESS-BUNDLER-NOT: {{.*}}clang-offload-bundler{{.*}}"-compress"{{.*}} diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp index bd2c8ef558e6f..ffabf1cb07aad 100644 --- a/sycl/test-e2e/Compression/compression.cpp +++ b/sycl/test-e2e/Compression/compression.cpp @@ -1,9 +1,6 @@ // End-to-End test for testing device image compression. // REQUIRES: zstd -// XFAIL: hip_amd -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15829 - // RUN: %{build} -O0 -g %S/Inputs/single_kernel.cpp -o %t_not_compress.out // RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out // RUN: %{run} %t_not_compress.out diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp index a561ec5f342a9..72eb3f0904790 100644 --- a/sycl/test-e2e/Compression/compression_multiple_tu.cpp +++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp @@ -2,9 +2,6 @@ // translation units, one compressed and one not compressed. // REQUIRES: zstd, linux -// XFAIL: hip_amd -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/15829 - // RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %T/kernel1.so // RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %T/kernel2.so From b7ef830fb490cf5b344035dfc0be7a78a9e6ccd5 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Tue, 29 Oct 2024 00:21:14 -0700 Subject: [PATCH 7/9] [SYCL] Provide a flexible way to compile a single devicelib with specified CFLAGS (#15870) Add an new function in libdevice CMake to provide a way to pass arbitrary compiling flags when building a device library file. This is preparatory work to add AOT device asan library targeting for different GPU/CPU platforms. Signed-off-by: jinge90 --- libdevice/cmake/modules/SYCLLibdevice.cmake | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 8cd5513933c0c..043ffc49e2fac 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -112,13 +112,29 @@ function(compile_lib filename) "FILETYPE" "SRC;EXTRA_OPTS;DEPENDENCIES" ${ARGN}) + set(compile_opt_list ${compile_opts} + ${${ARG_FILETYPE}_device_compile_opts} + ${ARG_EXTRA_OPTS}) + compile_lib_ext(${filename} + FILETYPE ${ARG_FILETYPE} + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + OPTS ${compile_opt_list}) +endfunction() + +function(compile_lib_ext filename) + cmake_parse_arguments(ARG + "" + "FILETYPE" + "SRC;OPTS;DEPENDENCIES" + ${ARGN}) set(devicelib-file ${${ARG_FILETYPE}_binary_dir}/${filename}.${${ARG_FILETYPE}-suffix}) add_custom_command( OUTPUT ${devicelib-file} - COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + COMMAND ${clang} ${ARG_OPTS} ${CMAKE_CURRENT_SOURCE_DIR}/${ARG_SRC} -o ${devicelib-file} MAIN_DEPENDENCY ${ARG_SRC} DEPENDS ${ARG_DEPENDENCIES} From 2172d9ee91d5ff70fc3e995c3dfd64c84c3738a7 Mon Sep 17 00:00:00 2001 From: dklochkov-emb Date: Tue, 29 Oct 2024 10:51:21 +0100 Subject: [PATCH 8/9] [SYCL] Deprecate `[[intel::reqd_sub_group_size]]` (#15798) That particular spelling had been introduced as part of an Intel extension to support sub-groups in SYCL 1.2.1 before they became a core feature of SYCL 2020. The extension has been deprecated for a while and no one should use that legacy spelling anymore. The official SYCL 2020 spelling should be used instead (with `sycl::` namespace). --- clang/lib/Sema/SemaSYCL.cpp | 2 +- clang/lib/Sema/SemaSYCLDeclAttr.cpp | 7 +++ clang/test/CodeGenSYCL/kernel-op-calls.cpp | 2 +- .../test/CodeGenSYCL/reqd-sub-group-size.cpp | 8 +-- .../CodeGenSYCL/sycl-multi-kernel-attr.cpp | 4 +- .../SemaSYCL/parallel_for_wrapper_attr.cpp | 2 +- .../test/SemaSYCL/reqd-sub-group-size-ast.cpp | 14 +++--- .../SemaSYCL/reqd-sub-group-size-host.cpp | 4 +- clang/test/SemaSYCL/reqd-sub-group-size.cpp | 49 +++++++++++-------- clang/test/SemaSYCL/sub-group-size.cpp | 26 ++++++---- .../SemaSYCL/sycl-attr-warn-non-kernel.cpp | 4 +- sycl/test-e2e/AOT/reqd-sg-size.cpp | 2 +- .../allocate_barrier_InvokeSimd.cpp | 2 +- .../SYCL2020/group_sort/array_input_sort.cpp | 2 +- .../group_sort/group_and_joint_sort.cpp | 4 +- .../group_sort/key_value_array_input_sort.cpp | 2 +- .../SYCL2020/group_sort/key_value_sort.cpp | 2 +- .../GroupAlgorithm/load_store/odd_wg_size.cpp | 2 +- .../GroupAlgorithm/load_store/partial_sg.cpp | 2 +- .../Feature/SPMD_invoke_ESIMD_external.cpp | 2 +- .../InvokeSimd/Feature/invoke_simd_struct.cpp | 2 +- .../Feature/invoke_simd_struct_by_pointer.cpp | 2 +- sycl/test-e2e/InvokeSimd/Feature/popcnt.cpp | 2 +- .../InvokeSimd/Feature/popcnt_emu.cpp | 2 +- sycl/test-e2e/InvokeSimd/Feature/scale.cpp | 2 +- .../Feature/split_module/SPMD_module.cpp | 2 +- .../InvokeSimd/Feature/void_retval.cpp | 2 +- .../Regression/address_space_cast.cpp | 2 +- .../Regression/call_vadd_1d_loop.cpp | 2 +- .../Regression/call_vadd_1d_loop_naive.cpp | 2 +- .../Regression/call_vadd_1d_spill.cpp | 2 +- .../InvokeSimd/Regression/debug_symbols.cpp | 2 +- sycl/test-e2e/InvokeSimd/Regression/dp4a.cpp | 2 +- .../InvokeSimd/Regression/matrix_add.cpp | 2 +- .../Regression/matrix_multiply_USM.cpp | 2 +- .../matrix_multiply_accessor_get_pointer.cpp | 2 +- .../InvokeSimd/Regression/nbarrier_basic.cpp | 2 +- .../Regression/nbarrier_exec_in_order.cpp | 2 +- .../InvokeSimd/Regression/nbarrier_loop.cpp | 2 +- .../Regression/nbarrier_multiple_wg.cpp | 2 +- .../Regression/slm_gather_scatter.cpp | 2 +- .../InvokeSimd/Regression/slm_load_store.cpp | 2 +- .../tiled_matrix_multiplication.cpp | 2 +- .../Spec/ESIMD_to_unmarked_function.cpp | 2 +- .../clang_run_error/reference_argument.cpp | 2 +- .../InvokeSimd/Spec/function_overloads.cpp | 2 +- .../Spec/multiple_SPMD_to_multiple_ESIMD.cpp | 2 +- .../Spec/multiple_SPMD_to_single_ESIMD.cpp | 2 +- .../InvokeSimd/Spec/nested_ESIMD_to_ESIMD.cpp | 2 +- .../InvokeSimd/Spec/nested_SPMD_to_ESIMD.cpp | 2 +- sycl/test-e2e/InvokeSimd/Spec/simd_mask.cpp | 2 +- .../Spec/simd_size/Inputs/common.hpp | 2 +- sycl/test-e2e/InvokeSimd/Spec/tuple.cpp | 2 +- .../test-e2e/InvokeSimd/Spec/tuple_return.cpp | 2 +- sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp | 2 +- .../InvokeSimd/Spec/uniform_retval.cpp | 2 +- sycl/test-e2e/InvokeSimd/invoke_simd_conv.cpp | 2 +- .../test-e2e/InvokeSimd/invoke_simd_smoke.cpp | 2 +- .../test-e2e/Matrix/element_wise_abc_impl.hpp | 2 +- .../Matrix/element_wise_all_ops_half_impl.hpp | 2 +- .../Matrix/element_wise_all_ops_impl.hpp | 4 +- .../Matrix/element_wise_all_ops_int8_impl.hpp | 2 +- .../element_wise_all_ops_int8_packed_impl.hpp | 2 +- .../Matrix/element_wise_all_ops_tf32_impl.hpp | 2 +- .../Matrix/element_wise_all_sizes_impl.hpp | 2 +- .../test-e2e/Matrix/element_wise_ops_impl.hpp | 2 +- .../Matrix/elemwise_irreg_size_ops_bf16.cpp | 2 +- .../Matrix/get_coord_float_matC_impl.hpp | 2 +- .../Matrix/get_coord_int8_matA_impl.hpp | 2 +- .../Matrix/get_coord_int8_matB_impl.hpp | 2 +- .../Matrix/joint_matrix_all_sizes_impl.hpp | 2 +- .../joint_matrix_annotated_ptr_impl.hpp | 2 +- .../Matrix/joint_matrix_apply_bf16_impl.hpp | 2 +- .../joint_matrix_apply_two_matrices_impl.hpp | 2 +- .../joint_matrix_bf16_fill_k_cache_impl.hpp | 2 +- ..._matrix_bf16_rowmajorB_load_store_impl.hpp | 2 +- ...ix_bf16_rowmajorB_pair_load_store_impl.hpp | 2 +- .../joint_matrix_bfloat16_array_impl.hpp | 2 +- ...trix_bfloat16_colmajorA_colmajorB_impl.hpp | 2 +- .../Matrix/joint_matrix_bfloat16_impl.hpp | 2 +- .../joint_matrix_bfloat16_packedB_impl.hpp | 2 +- .../joint_matrix_colA_rowB_colC_impl.hpp | 2 +- .../Matrix/joint_matrix_down_convert_impl.hpp | 2 +- .../Matrix/joint_matrix_half_impl.hpp | 2 +- ...t_matrix_int8_colmajorA_colmajorB_impl.hpp | 2 +- ...t_matrix_int8_rowmajorA_rowmajorB_impl.hpp | 2 +- .../joint_matrix_opt_kernel_feature_impl.hpp | 2 +- .../Matrix/joint_matrix_out_bounds_impl.hpp | 2 +- .../Matrix/joint_matrix_prefetch_impl.hpp | 2 +- .../Matrix/joint_matrix_query_default.cpp | 2 +- .../joint_matrix_rowmajorA_rowmajorB_impl.hpp | 2 +- .../Matrix/joint_matrix_ss_int8_impl.hpp | 2 +- .../Matrix/joint_matrix_su_int8_impl.hpp | 2 +- .../Matrix/joint_matrix_tf32_impl.hpp | 2 +- .../Matrix/joint_matrix_transposeC_impl.hpp | 2 +- .../Matrix/joint_matrix_us_int8_impl.hpp | 2 +- .../Matrix/joint_matrix_uu_int8_impl.hpp | 2 +- sycl/test-e2e/SubGroupMask/Basic.cpp | 2 +- sycl/test-e2e/SubGroupMask/GroupSize.cpp | 4 +- .../syclcompat/util/util_logical_group.cpp | 2 +- .../util/util_match_all_over_group.cpp | 2 +- .../util/util_match_any_over_group.cpp | 2 +- .../util/util_permute_sub_group_by_xor.cpp | 4 +- .../util/util_select_from_sub_group.cpp | 4 +- .../util/util_shift_sub_group_left.cpp | 4 +- .../util/util_shift_sub_group_right.cpp | 4 +- .../matrix/matrix-int8-test.cpp | 2 +- .../matrix/matrix_load_store_as.cpp | 2 +- sycl/test/esimd/slm_init_invoke_simd.cpp | 2 +- sycl/test/extensions/inline_asm.cpp | 12 ++--- sycl/test/invoke_simd/invoke_simd.cpp | 2 +- .../invoke_simd_address_space_inferral.cpp | 2 +- .../matrix-bfloat16-test-coord-basicB.cpp | 2 +- sycl/test/matrix/matrix-tf32-test.cpp | 2 +- sycl/test/warnings/warnings.cpp | 3 +- 115 files changed, 188 insertions(+), 167 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index d8925669b1ae3..996443bcbc8fd 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -3167,7 +3167,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { // // code // } // -// [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const +// [[sycl::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const // { // // code // } diff --git a/clang/lib/Sema/SemaSYCLDeclAttr.cpp b/clang/lib/Sema/SemaSYCLDeclAttr.cpp index 0e23934b4597f..db0a2fdce4aad 100644 --- a/clang/lib/Sema/SemaSYCLDeclAttr.cpp +++ b/clang/lib/Sema/SemaSYCLDeclAttr.cpp @@ -132,6 +132,13 @@ void SemaSYCL::checkDeprecatedSYCLAttributeSpelling(const ParsedAttr &A, return; } + // Additionally, diagnose deprecated [[intel::reqd_sub_group_size]] spelling + if (A.getKind() == ParsedAttr::AT_IntelReqdSubGroupSize && A.getScopeName() && + A.getScopeName()->isStr("intel")) { + diagnoseDeprecatedAttribute(A, "sycl", "reqd_sub_group_size"); + return; + } + // Diagnose SYCL 2020 spellings in later SYCL modes. if (getLangOpts().getSYCLVersion() >= LangOptions::SYCL_2020) { // All attributes in the cl vendor namespace are deprecated in favor of a diff --git a/clang/test/CodeGenSYCL/kernel-op-calls.cpp b/clang/test/CodeGenSYCL/kernel-op-calls.cpp index a8f17d8235a50..0c3a53586b46c 100644 --- a/clang/test/CodeGenSYCL/kernel-op-calls.cpp +++ b/clang/test/CodeGenSYCL/kernel-op-calls.cpp @@ -11,7 +11,7 @@ class Functor1 { public: Functor1(){} - [[intel::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {} + [[sycl::reqd_sub_group_size(4)]] void operator()(sycl::id<1> id) const {} [[sycl::work_group_size_hint(1, 2, 3)]] void operator()(sycl::id<2> id) const {} diff --git a/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp b/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp index 4396313eac485..cdbc5158535cc 100644 --- a/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp +++ b/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp @@ -7,17 +7,17 @@ queue q; class Functor16 { public: - [[intel::reqd_sub_group_size(16)]] void operator()() const {} + [[sycl::reqd_sub_group_size(16)]] void operator()() const {} }; template class Functor2 { public: - [[intel::reqd_sub_group_size(SIZE)]] void operator()() const {} + [[sycl::reqd_sub_group_size(SIZE)]] void operator()() const {} }; template -[[intel::reqd_sub_group_size(N)]] void func() {} +[[sycl::reqd_sub_group_size(N)]] void func() {} int main() { q.submit([&](handler &h) { @@ -25,7 +25,7 @@ int main() { h.single_task(f16); h.single_task( - []() [[intel::reqd_sub_group_size(4)]]{}); + []() [[sycl::reqd_sub_group_size(4)]]{}); Functor2<2> f2; h.single_task(f2); diff --git a/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp b/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp index 45f258a01b67e..cde7f32ebe068 100644 --- a/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp +++ b/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp @@ -7,12 +7,12 @@ queue q; class Functor { public: - [[intel::reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {} + [[sycl::reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {} }; class Functor1 { public: - [[intel::reqd_sub_group_size(2), sycl::reqd_work_group_size(64, 32, 32)]] void operator()() const {} + [[sycl::reqd_sub_group_size(2), sycl::reqd_work_group_size(64, 32, 32)]] void operator()() const {} }; template diff --git a/clang/test/SemaSYCL/parallel_for_wrapper_attr.cpp b/clang/test/SemaSYCL/parallel_for_wrapper_attr.cpp index c4da05dfbf234..c38b40e183edc 100644 --- a/clang/test/SemaSYCL/parallel_for_wrapper_attr.cpp +++ b/clang/test/SemaSYCL/parallel_for_wrapper_attr.cpp @@ -6,7 +6,7 @@ template class Fobj { public: Fobj() {} void operator()() const { - auto L0 = []() [[intel::reqd_sub_group_size(4)]]{}; + auto L0 = []() [[sycl::reqd_sub_group_size(4)]]{}; L0(); } }; diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-ast.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-ast.cpp index 8386663bbd37a..b21ca48807622 100644 --- a/clang/test/SemaSYCL/reqd-sub-group-size-ast.cpp +++ b/clang/test/SemaSYCL/reqd-sub-group-size-ast.cpp @@ -9,14 +9,14 @@ queue q; class Functor16 { public: - [[intel::reqd_sub_group_size(16)]] void operator()() const {} + [[sycl::reqd_sub_group_size(16)]] void operator()() const {} }; // Test that checks template parameter support on member function of class template. template class KernelFunctor { public: - [[intel::reqd_sub_group_size(SIZE)]] void operator()() const {} + [[sycl::reqd_sub_group_size(SIZE)]] void operator()() const {} }; // Test that checks template parameter support on function. @@ -35,7 +35,7 @@ class KernelFunctor { // CHECK-NEXT: NonTypeTemplateParmDecl // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 12 template -[[intel::reqd_sub_group_size(N)]] void func() {} +[[sycl::reqd_sub_group_size(N)]] void func() {} int main() { q.submit([&](handler &h) { @@ -52,14 +52,14 @@ int main() { // CHECK-NEXT: ConstantExpr {{.*}} 'int' // CHECK-NEXT: value: Int 2 // CHECK-NEXT: IntegerLiteral{{.*}}2{{$}} - h.single_task([]() [[intel::reqd_sub_group_size(2)]] {}); + h.single_task([]() [[sycl::reqd_sub_group_size(2)]] {}); // CHECK: FunctionDecl {{.*}} {{.*}}kernel_name5 // CHECK: IntelReqdSubGroupSizeAttr {{.*}} reqd_sub_group_size // CHECK-NEXT: ConstantExpr {{.*}} 'int' // CHECK-NEXT: value: Int 6 // CHECK-NEXT: IntegerLiteral{{.*}}6{{$}} - h.single_task([]() [[intel::reqd_sub_group_size(6)]] {}); + h.single_task([]() [[sycl::reqd_sub_group_size(6)]] {}); // CHECK: FunctionDecl {{.*}}kernel_name_6 // CHECK: IntelReqdSubGroupSizeAttr {{.*}} reqd_sub_group_size @@ -79,8 +79,8 @@ int main() { // CHECK-NEXT: value: Int 8 // CHECK-NEXT: IntegerLiteral{{.*}}8{{$}} // CHECK-NOT: IntelReqdSubGroupSizeAttr - []() [[intel::reqd_sub_group_size(8), - intel::reqd_sub_group_size(8)]] {}); + []() [[sycl::reqd_sub_group_size(8), + sycl::reqd_sub_group_size(8)]] {}); }); func<12>(); return 0; diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-host.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-host.cpp index ab11ef9a19ce9..0acbda529e3be 100644 --- a/clang/test/SemaSYCL/reqd-sub-group-size-host.cpp +++ b/clang/test/SemaSYCL/reqd-sub-group-size-host.cpp @@ -1,9 +1,9 @@ // RUN: %clang_cc1 -fsycl-is-host -fsyntax-only -verify %s // expected-no-diagnostics -[[intel::reqd_sub_group_size(8)]] void fun() {} +[[sycl::reqd_sub_group_size(8)]] void fun() {} class Functor { public: - [[intel::reqd_sub_group_size(16)]] void operator()() {} + [[sycl::reqd_sub_group_size(16)]] void operator()() {} }; diff --git a/clang/test/SemaSYCL/reqd-sub-group-size.cpp b/clang/test/SemaSYCL/reqd-sub-group-size.cpp index e86b68db49cfa..b44892bf44176 100644 --- a/clang/test/SemaSYCL/reqd-sub-group-size.cpp +++ b/clang/test/SemaSYCL/reqd-sub-group-size.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -verify -pedantic %s -// The test checks functionality of [[intel::reqd_sub_group_size()]] attribute on SYCL kernel. +// The test checks functionality of [[sycl::reqd_sub_group_size()]] attribute on SYCL kernel and [[intel::reqd_sub_group_size()]] is deprecated. #include "sycl.hpp" //clang/test/SemaSYCL/Inputs/sycl.hpp @@ -32,44 +32,51 @@ int main() { }); return 0; } -[[intel::reqd_sub_group_size(16)]] SYCL_EXTERNAL void B(); -[[intel::reqd_sub_group_size(16)]] void A() // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} + +[[sycl::reqd_sub_group_size(16)]] SYCL_EXTERNAL void B(); +[[sycl::reqd_sub_group_size(16)]] void A() // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} { } -[[intel::reqd_sub_group_size(16)]] SYCL_EXTERNAL void B() { // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} +[[sycl::reqd_sub_group_size(16)]] SYCL_EXTERNAL void B(); +[[sycl::reqd_sub_group_size(16)]] SYCL_EXTERNAL void B() { // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} A(); } + // expected-note@+1 {{conflicting attribute is here}} -[[intel::reqd_sub_group_size(2)]] void sg_size2() {} // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} +[[sycl::reqd_sub_group_size(2)]] void sg_size2() {} // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} // expected-note@+3 {{conflicting attribute is here}} // expected-error@+2 {{conflicting attributes applied to a SYCL kernel}} // expected-warning@+1 {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} -[[intel::reqd_sub_group_size(4)]] __attribute__((sycl_device)) void sg_size4() { +[[sycl::reqd_sub_group_size(4)]] __attribute__((sycl_device)) void sg_size4() { sg_size2(); } // Test that checks support and functionality of reqd_sub_group_size attribute support on function. // Tests for incorrect argument values for Intel reqd_sub_group_size attribute. -[[intel::reqd_sub_group_size]] void one() {} // expected-error {{'reqd_sub_group_size' attribute takes one argument}} -[[intel::reqd_sub_group_size(5)]] int a; // expected-error{{'reqd_sub_group_size' attribute only applies to functions}} -[[intel::reqd_sub_group_size("foo")]] void func() {} // expected-error{{integral constant expression must have integral or unscoped enumeration type, not 'const char[4]'}} -[[intel::reqd_sub_group_size(-1)]] void func1() {} // expected-error{{'reqd_sub_group_size' attribute requires a positive integral compile time constant expression}} -[[intel::reqd_sub_group_size(0, 1)]] void arg() {} // expected-error{{'reqd_sub_group_size' attribute takes one argument}} +[[sycl::reqd_sub_group_size]] void one() {} // expected-error {{'reqd_sub_group_size' attribute takes one argument}} +[[sycl::reqd_sub_group_size(5)]] int a; // expected-error{{'reqd_sub_group_size' attribute only applies to functions}} +[[sycl::reqd_sub_group_size("foo")]] void func() {} // expected-error{{integral constant expression must have integral or unscoped enumeration type, not 'const char[4]'}} +[[sycl::reqd_sub_group_size(-1)]] void func1() {} // expected-error{{'reqd_sub_group_size' attribute requires a positive integral compile time constant expression}} +[[sycl::reqd_sub_group_size(0, 1)]] void arg() {} // expected-error{{'reqd_sub_group_size' attribute takes one argument}} // Diagnostic is emitted because the arguments mismatch. -[[intel::reqd_sub_group_size(12)]] void quux(); // expected-note {{previous attribute is here}} -[[intel::reqd_sub_group_size(100)]] void quux(); // expected-warning {{attribute 'reqd_sub_group_size' is already applied with different arguments}} expected-note {{previous attribute is here}} +[[sycl::reqd_sub_group_size(12)]] void quux(); // expected-note {{previous attribute is here}} +[[sycl::reqd_sub_group_size(100)]] void quux(); // expected-warning {{attribute 'reqd_sub_group_size' is already applied with different arguments}} expected-note {{previous attribute is here}} [[sycl::reqd_sub_group_size(200)]] void quux(); // expected-warning {{attribute 'reqd_sub_group_size' is already applied with different arguments}} // Make sure there's at least one argument passed. [[sycl::reqd_sub_group_size]] void quibble(); // expected-error {{'reqd_sub_group_size' attribute takes one argument}} // No diagnostic is emitted because the arguments match. +[[sycl::reqd_sub_group_size(12)]] void same(); +[[sycl::reqd_sub_group_size(12)]] void same() {} // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} + +// expected-note@+2 {{did you mean to use 'sycl::reqd_sub_group_size' instead?}} +// expected-warning@+1{{attribute 'intel::reqd_sub_group_size' is deprecated}} [[intel::reqd_sub_group_size(12)]] void same(); -[[intel::reqd_sub_group_size(12)]] void same() {} // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} // No diagnostic because the attributes are synonyms with identical behavior. [[sycl::reqd_sub_group_size(12)]] void same(); // OK @@ -80,7 +87,7 @@ template // expected-error@+3{{'reqd_sub_group_size' attribute requires a positive integral compile time constant expression}} // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'S'}} // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'float'}} -[[intel::reqd_sub_group_size(Ty{})]] void func() {} +[[sycl::reqd_sub_group_size(Ty{})]] void func() {} struct S {}; void test() { @@ -97,18 +104,18 @@ void test() { int foo1(); // expected-error@+2{{expression is not an integral constant expression}} // expected-note@+1{{non-constexpr function 'foo1' cannot be used in a constant expression}} -[[intel::reqd_sub_group_size(foo1() + 12)]] void func1(); +[[sycl::reqd_sub_group_size(foo1() + 12)]] void func1(); // Test that checks expression is a constant expression. constexpr int bar1() { return 0; } -[[intel::reqd_sub_group_size(bar1() + 12)]] void func2(); // OK +[[sycl::reqd_sub_group_size(bar1() + 12)]] void func2(); // OK // Test that checks template parameter support on member function of class template. template class KernelFunctor { public: // expected-error@+1{{'reqd_sub_group_size' attribute requires a positive integral compile time constant expression}} - [[intel::reqd_sub_group_size(SIZE)]] void operator()() {} + [[sycl::reqd_sub_group_size(SIZE)]] void operator()() {} }; int check() { @@ -121,14 +128,14 @@ int check() { template // expected-error@+2{{'reqd_sub_group_size' attribute requires a positive integral compile time constant expression}} // expected-warning@+1 {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} -[[intel::reqd_sub_group_size(N)]] void func3() {} +[[sycl::reqd_sub_group_size(N)]] void func3() {} template // expected-warning@+1 {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} -[[intel::reqd_sub_group_size(4)]] void func4(); // expected-note {{previous attribute is here}} +[[sycl::reqd_sub_group_size(4)]] void func4(); // expected-note {{previous attribute is here}} template -[[intel::reqd_sub_group_size(N)]] void func4() {} // expected-warning {{attribute 'reqd_sub_group_size' is already applied with different arguments}} +[[sycl::reqd_sub_group_size(N)]] void func4() {} // expected-warning {{attribute 'reqd_sub_group_size' is already applied with different arguments}} int check1() { // no error expected diff --git a/clang/test/SemaSYCL/sub-group-size.cpp b/clang/test/SemaSYCL/sub-group-size.cpp index aa85dec244fb0..65f821c1026d8 100644 --- a/clang/test/SemaSYCL/sub-group-size.cpp +++ b/clang/test/SemaSYCL/sub-group-size.cpp @@ -3,29 +3,29 @@ // RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-default-sub-group-size=10 -sycl-std=2020 -internal-isystem %S/Inputs -fsyntax-only -verify=expected,primary %s // Validate the semantic analysis checks for the interaction betwen the -// named_sub_group_size and sub_group_size attributes. These are not able to be +// named_sub_group_size and reqd_sub_group_size attributes. These are not able to be // combined, and require that they only be applied to non-sycl-kernel/ // non-sycl-device functions if they match the kernel they are being called // from. #include "Inputs/sycl.hpp" -// expected-error@+2 {{'named_sub_group_size' and 'sub_group_size' attributes are not compatible}} +// expected-error@+2 {{'named_sub_group_size' and 'reqd_sub_group_size' attributes are not compatible}} // expected-note@+1 {{conflicting attribute is here}} -[[intel::sub_group_size(1)]] [[intel::named_sub_group_size(automatic)]] void f1(); -// expected-error@+2 {{'sub_group_size' and 'named_sub_group_size' attributes are not compatible}} +[[sycl::reqd_sub_group_size(1)]] [[intel::named_sub_group_size(automatic)]] void f1(); +// expected-error@+2 {{'reqd_sub_group_size' and 'named_sub_group_size' attributes are not compatible}} // expected-note@+1 {{conflicting attribute is here}} -[[intel::named_sub_group_size(primary)]] [[intel::sub_group_size(1)]] void f2(); +[[intel::named_sub_group_size(primary)]] [[sycl::reqd_sub_group_size(1)]] void f2(); // expected-note@+1 {{conflicting attribute is here}} -[[intel::sub_group_size(1)]] void f3(); -// expected-error@+1 {{'named_sub_group_size' and 'sub_group_size' attributes are not compatible}} +[[sycl::reqd_sub_group_size(1)]] void f3(); +// expected-error@+1 {{'named_sub_group_size' and 'reqd_sub_group_size' attributes are not compatible}} [[intel::named_sub_group_size(primary)]] void f3(); // expected-note@+1 {{conflicting attribute is here}} [[intel::named_sub_group_size(primary)]] void f4(); -// expected-error@+1 {{'sub_group_size' and 'named_sub_group_size' attributes are not compatible}} -[[intel::sub_group_size(1)]] void f4(); +// expected-error@+1 {{'reqd_sub_group_size' and 'named_sub_group_size' attributes are not compatible}} +[[sycl::reqd_sub_group_size(1)]] void f4(); // expected-note@+1 {{previous attribute is here}} [[intel::named_sub_group_size(automatic)]] void f5(); @@ -115,8 +115,14 @@ void calls_kernel_3() { }); } +// expected-note@+2 {{did you mean to use 'sycl::reqd_sub_group_size' instead?}} +// expected-warning@+1{{attribute 'intel::sub_group_size' is deprecated}} [[intel::sub_group_size(10)]] void AttrFunc2() {} // #AttrFunc2 +// expected-note@+2 {{did you mean to use 'sycl::reqd_sub_group_size' instead?}} +// expected-warning@+1{{attribute 'intel::sub_group_size' is deprecated}} [[intel::sub_group_size(10)]] SYCL_EXTERNAL void AttrExternalDefined2() {} // #AttrExternalDefined2 +// expected-note@+2 {{did you mean to use 'sycl::reqd_sub_group_size' instead?}} +// expected-warning@+1{{attribute 'intel::sub_group_size' is deprecated}} [[intel::sub_group_size(10)]] SYCL_EXTERNAL void AttrExternalNotDefined2(); // #AttrExternalNotDefined2 void calls_kernel_4() { @@ -153,7 +159,7 @@ void calls_kernel_5() { // Don't diag with the old sub-group-size. void calls_kernel_6() { - sycl::kernel_single_task([]() [[intel::reqd_sub_group_size(10)]] { // #Kernel6 + sycl::kernel_single_task([]() [[sycl::reqd_sub_group_size(10)]] { // #Kernel6 NoAttrExternalNotDefined(); }); } diff --git a/clang/test/SemaSYCL/sycl-attr-warn-non-kernel.cpp b/clang/test/SemaSYCL/sycl-attr-warn-non-kernel.cpp index c4ff35d6e02db..8115df801552e 100644 --- a/clang/test/SemaSYCL/sycl-attr-warn-non-kernel.cpp +++ b/clang/test/SemaSYCL/sycl-attr-warn-non-kernel.cpp @@ -7,7 +7,7 @@ [[sycl::reqd_work_group_size(16)]] void f1(){ // expected-warning {{'reqd_work_group_size' attribute can only be applied to a SYCL kernel function}} } -[[intel::reqd_sub_group_size(12)]] void f3(){ // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} +[[sycl::reqd_sub_group_size(12)]] void f3(){ // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} } [[sycl::reqd_work_group_size(16)]] void f4(){ // expected-warning {{'reqd_work_group_size' attribute can only be applied to a SYCL kernel function}} @@ -41,7 +41,7 @@ class Functor16x16x16 { class FunctorSubGroupSize4 { public: - [[intel::reqd_sub_group_size(4)]] void operator()() const{} // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} + [[sycl::reqd_sub_group_size(4)]] void operator()() const{} // expected-warning {{'reqd_sub_group_size' attribute can only be applied to a SYCL kernel function}} }; class Functor8 { diff --git a/sycl/test-e2e/AOT/reqd-sg-size.cpp b/sycl/test-e2e/AOT/reqd-sg-size.cpp index f7c7884d7ea71..c2ac12de707cf 100644 --- a/sycl/test-e2e/AOT/reqd-sg-size.cpp +++ b/sycl/test-e2e/AOT/reqd-sg-size.cpp @@ -42,7 +42,7 @@ template struct SubgroupDispatcher { accessor acc{buf, cgh}; cgh.parallel_for>( nd_range<1>(1, 1), - [=](auto item) [[intel::reqd_sub_group_size(size)]] { + [=](auto item) [[sycl::reqd_sub_group_size(size)]] { acc[0] = item.get_sub_group().get_max_local_range()[0]; }); }); diff --git a/sycl/test-e2e/ESIMD/named_barriers/allocate_barrier_InvokeSimd.cpp b/sycl/test-e2e/ESIMD/named_barriers/allocate_barrier_InvokeSimd.cpp index f734ae4e696bb..3f2a8818eaa40 100644 --- a/sycl/test-e2e/ESIMD/named_barriers/allocate_barrier_InvokeSimd.cpp +++ b/sycl/test-e2e/ESIMD/named_barriers/allocate_barrier_InvokeSimd.cpp @@ -32,7 +32,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl; diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp index 50addd2898e6b..3adfc92ccf256 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/array_input_sort.cpp @@ -78,7 +78,7 @@ void RunSortOverGroupArray(sycl::queue &Q, const std::vector &DataToSort, CGH); CGH.parallel_for( - NDRange, [=](sycl::nd_item id) [[intel::reqd_sub_group_size( + NDRange, [=](sycl::nd_item id) [[sycl::reqd_sub_group_size( ReqSubGroupSize)]] { const size_t GlobalLinearID = id.get_global_linear_id(); using RadixSorterT = oneapi_exp::radix_sorters::group_sorter< diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp index 985a9fea71133..a5ca0006b79ed 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/group_and_joint_sort.cpp @@ -115,7 +115,7 @@ void RunJointSort(sycl::queue &Q, const std::vector &DataToSort, CGH.parallel_for, UseGroupWrapper, T, Compare>>( - NDRange, [=](sycl::nd_item ID) [[intel::reqd_sub_group_size( + NDRange, [=](sycl::nd_item ID) [[sycl::reqd_sub_group_size( ReqSubGroupSize)]] { auto Group = [&]() { if constexpr (UseGroup == UseGroupT::SubGroup) @@ -282,7 +282,7 @@ void RunSortOVerGroup(sycl::queue &Q, const std::vector &DataToSort, CGH.parallel_for, UseGroupWrapper, T, Compare>>( - NDRange, [=](sycl::nd_item id) [[intel::reqd_sub_group_size( + NDRange, [=](sycl::nd_item id) [[sycl::reqd_sub_group_size( ReqSubGroupSize)]] { const size_t GlobalLinearID = id.get_global_linear_id(); diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp index bcb5b3cbd5aa6..0b415f878e85f 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_array_input_sort.cpp @@ -94,7 +94,7 @@ void RunSortKeyValueOverGroupArray(sycl::queue &Q, CGH); CGH.parallel_for(NDRange, [=](sycl::nd_item - id) [[intel::reqd_sub_group_size( + id) [[sycl::reqd_sub_group_size( ReqSubGroupSize)]] { const size_t GlobalLinearID = id.get_global_linear_id(); diff --git a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp index 95b1e07445150..b2347d9b6de6e 100644 --- a/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp +++ b/sycl/test-e2e/GroupAlgorithm/SYCL2020/group_sort/key_value_sort.cpp @@ -120,7 +120,7 @@ void RunSortKeyValueOverGroup(sycl::queue &Q, CGH); auto KeyValueSortKernel = - [=](sycl::nd_item id) [[intel::reqd_sub_group_size( + [=](sycl::nd_item id) [[sycl::reqd_sub_group_size( ReqSubGroupSize)]] { const size_t GlobalLinearID = id.get_global_linear_id(); diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp index adce5e9f588bc..c778631ccc05f 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/odd_wg_size.cpp @@ -41,7 +41,7 @@ template void test(queue &q) { cgh.parallel_for( nd_range<1>{global_size, wg_size}, - [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(SG_SIZE)]] { + [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(SG_SIZE)]] { auto gid = ndi.get_global_id(0); auto g = ndi.get_group(); auto offset = g.get_group_id(0) * g.get_local_range(0) * elems_per_wi; diff --git a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp index 72b87364f0ee9..d01fbf4e6b7ae 100644 --- a/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp +++ b/sycl/test-e2e/GroupAlgorithm/load_store/partial_sg.cpp @@ -37,7 +37,7 @@ template void test(queue &q) { cgh.parallel_for( nd_range<1>{wg_size, wg_size}, - [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(SG_SIZE)]] { + [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(SG_SIZE)]] { auto gid = ndi.get_global_id(0); auto sg = ndi.get_sub_group(); auto offset = diff --git a/sycl/test-e2e/InvokeSimd/Feature/SPMD_invoke_ESIMD_external.cpp b/sycl/test-e2e/InvokeSimd/Feature/SPMD_invoke_ESIMD_external.cpp index 648df2f5c5489..906e2fa46114e 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/SPMD_invoke_ESIMD_external.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/SPMD_invoke_ESIMD_external.cpp @@ -25,7 +25,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp b/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp index 9c5b92645c9d7..75bd87d74d0f3 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct.cpp @@ -32,7 +32,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct_by_pointer.cpp b/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct_by_pointer.cpp index 27709232eaac1..d137545d96295 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct_by_pointer.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/invoke_simd_struct_by_pointer.cpp @@ -34,7 +34,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/popcnt.cpp b/sycl/test-e2e/InvokeSimd/Feature/popcnt.cpp index c39983bba4ebe..f99c6fd0e097b 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/popcnt.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/popcnt.cpp @@ -48,7 +48,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/popcnt_emu.cpp b/sycl/test-e2e/InvokeSimd/Feature/popcnt_emu.cpp index 1301ccd0f0ce1..85ab93d7d43a9 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/popcnt_emu.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/popcnt_emu.cpp @@ -49,7 +49,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/scale.cpp b/sycl/test-e2e/InvokeSimd/Feature/scale.cpp index e78871769aa41..5e4af520b43c3 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/scale.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/scale.cpp @@ -36,7 +36,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/split_module/SPMD_module.cpp b/sycl/test-e2e/InvokeSimd/Feature/split_module/SPMD_module.cpp index fc4168e2568aa..a159c685b3204 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/split_module/SPMD_module.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/split_module/SPMD_module.cpp @@ -25,7 +25,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Feature/void_retval.cpp b/sycl/test-e2e/InvokeSimd/Feature/void_retval.cpp index 123c6f2dbe770..5989e49dfe41d 100644 --- a/sycl/test-e2e/InvokeSimd/Feature/void_retval.cpp +++ b/sycl/test-e2e/InvokeSimd/Feature/void_retval.cpp @@ -35,7 +35,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/address_space_cast.cpp b/sycl/test-e2e/InvokeSimd/Regression/address_space_cast.cpp index 3890c41037fb9..8a97eabfe7e30 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/address_space_cast.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/address_space_cast.cpp @@ -65,7 +65,7 @@ bool test() { try { auto e = q.submit([&](handler &cgh) { cgh.parallel_for( - Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] { + Range, [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(VL)]] { sub_group sg = ndi.get_sub_group(); group<1> g = ndi.get_group(); uint32_t i = sg.get_group_linear_id() * VL + diff --git a/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop.cpp b/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop.cpp index 2cd6dfc68bf0f..6ec1dbc05bc77 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop.cpp @@ -38,7 +38,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop_naive.cpp b/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop_naive.cpp index f7b761f51d213..c1408c50f73d1 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop_naive.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_loop_naive.cpp @@ -41,7 +41,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_spill.cpp b/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_spill.cpp index 03f98feeac6d1..e2011c210eef2 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_spill.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/call_vadd_1d_spill.cpp @@ -33,7 +33,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/debug_symbols.cpp b/sycl/test-e2e/InvokeSimd/Regression/debug_symbols.cpp index 05d3d6b707052..11c118dad7615 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/debug_symbols.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/debug_symbols.cpp @@ -25,7 +25,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/dp4a.cpp b/sycl/test-e2e/InvokeSimd/Regression/dp4a.cpp index 1904c394594e7..15be55f7dde64 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/dp4a.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/dp4a.cpp @@ -28,7 +28,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(SIZE)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(SIZE)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/matrix_add.cpp b/sycl/test-e2e/InvokeSimd/Regression/matrix_add.cpp index d9def83f25dc1..ebae8778eb5e8 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/matrix_add.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/matrix_add.cpp @@ -29,7 +29,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_USM.cpp b/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_USM.cpp index f96af712ce573..9c9c1f1b7d03f 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_USM.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_USM.cpp @@ -44,7 +44,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_accessor_get_pointer.cpp b/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_accessor_get_pointer.cpp index b1187b1f90821..7644c52c610e5 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_accessor_get_pointer.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/matrix_multiply_accessor_get_pointer.cpp @@ -40,7 +40,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_basic.cpp b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_basic.cpp index 2923023ee6fa2..60a58576d2883 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_basic.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_basic.cpp @@ -26,7 +26,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl; diff --git a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_exec_in_order.cpp b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_exec_in_order.cpp index f8ca79827dae9..5ab6ac48ea87d 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_exec_in_order.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_exec_in_order.cpp @@ -176,7 +176,7 @@ bool test(queue q) { cgh.parallel_for>( nd_range<1>(global_range, local_range), // This test requires an explicit specification of the subgroup size - [=](nd_item<1> item) [[intel::reqd_sub_group_size(VL)]] { + [=](nd_item<1> item) [[sycl::reqd_sub_group_size(VL)]] { sycl::group<1> g = item.get_group(); sycl::sub_group sg = item.get_sub_group(); diff --git a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_loop.cpp b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_loop.cpp index ff2461b3aa4d2..c58b419b24feb 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_loop.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_loop.cpp @@ -164,7 +164,7 @@ int main() { cgh.parallel_for( nd_range<1>(GlobalRange, LocalRange), // This test requires an explicit specification of the subgroup size - [=](nd_item<1> item) [[intel::reqd_sub_group_size(VL)]] { + [=](nd_item<1> item) [[sycl::reqd_sub_group_size(VL)]] { sycl::group<1> g = item.get_group(); sycl::sub_group sg = item.get_sub_group(); diff --git a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_multiple_wg.cpp b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_multiple_wg.cpp index 77cc809abab70..8ec3c6ab0cf6c 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/nbarrier_multiple_wg.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/nbarrier_multiple_wg.cpp @@ -138,7 +138,7 @@ bool test(queue q) { cgh.parallel_for>( nd_range<1>(global_range, local_range), // This test requires an explicit specification of the subgroup size - [=](nd_item<1> item) [[intel::reqd_sub_group_size(VL)]] { + [=](nd_item<1> item) [[sycl::reqd_sub_group_size(VL)]] { sycl::group<1> g = item.get_group(); sycl::sub_group sg = item.get_sub_group(); diff --git a/sycl/test-e2e/InvokeSimd/Regression/slm_gather_scatter.cpp b/sycl/test-e2e/InvokeSimd/Regression/slm_gather_scatter.cpp index f4cc97d3c34b6..e2a97031469bf 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/slm_gather_scatter.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/slm_gather_scatter.cpp @@ -34,7 +34,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl; diff --git a/sycl/test-e2e/InvokeSimd/Regression/slm_load_store.cpp b/sycl/test-e2e/InvokeSimd/Regression/slm_load_store.cpp index c5c92ff6ac4fc..fb981f36a3d16 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/slm_load_store.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/slm_load_store.cpp @@ -34,7 +34,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl; diff --git a/sycl/test-e2e/InvokeSimd/Regression/tiled_matrix_multiplication.cpp b/sycl/test-e2e/InvokeSimd/Regression/tiled_matrix_multiplication.cpp index d7e67d28c4c1b..7b0ca06812d5d 100644 --- a/sycl/test-e2e/InvokeSimd/Regression/tiled_matrix_multiplication.cpp +++ b/sycl/test-e2e/InvokeSimd/Regression/tiled_matrix_multiplication.cpp @@ -46,7 +46,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/ESIMD_to_unmarked_function.cpp b/sycl/test-e2e/InvokeSimd/Spec/ESIMD_to_unmarked_function.cpp index 75b8eef13f926..2e24ad430ffa1 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/ESIMD_to_unmarked_function.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/ESIMD_to_unmarked_function.cpp @@ -43,7 +43,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl; diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/reference_argument.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/reference_argument.cpp index 8f7871bc1e26e..48d40b5067ad5 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/reference_argument.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/reference_argument.cpp @@ -67,7 +67,7 @@ int main(void) { try { auto e = q.submit([&](handler &cgh) { cgh.parallel_for( - Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] { + Range, [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(VL)]] { sub_group sg = ndi.get_sub_group(); group<1> g = ndi.get_group(); uint32_t i = sg.get_group_linear_id() * VL + diff --git a/sycl/test-e2e/InvokeSimd/Spec/function_overloads.cpp b/sycl/test-e2e/InvokeSimd/Spec/function_overloads.cpp index 91bf5efe0386e..71be83ded52d1 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/function_overloads.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/function_overloads.cpp @@ -37,7 +37,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_multiple_ESIMD.cpp b/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_multiple_ESIMD.cpp index 1a1c35c726367..4348b51f77eae 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_multiple_ESIMD.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_multiple_ESIMD.cpp @@ -33,7 +33,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_single_ESIMD.cpp b/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_single_ESIMD.cpp index 0ab085c7dd121..3a760ab1824d5 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_single_ESIMD.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/multiple_SPMD_to_single_ESIMD.cpp @@ -33,7 +33,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/nested_ESIMD_to_ESIMD.cpp b/sycl/test-e2e/InvokeSimd/Spec/nested_ESIMD_to_ESIMD.cpp index da0c18e7c1996..762f98480bcd8 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/nested_ESIMD_to_ESIMD.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/nested_ESIMD_to_ESIMD.cpp @@ -30,7 +30,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/nested_SPMD_to_ESIMD.cpp b/sycl/test-e2e/InvokeSimd/Spec/nested_SPMD_to_ESIMD.cpp index ece54a2dcf430..b0c062a2d72fe 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/nested_SPMD_to_ESIMD.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/nested_SPMD_to_ESIMD.cpp @@ -29,7 +29,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/simd_mask.cpp b/sycl/test-e2e/InvokeSimd/Spec/simd_mask.cpp index 1337efb63c3d4..eeabbe0f52774 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/simd_mask.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/simd_mask.cpp @@ -30,7 +30,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/simd_size/Inputs/common.hpp b/sycl/test-e2e/InvokeSimd/Spec/simd_size/Inputs/common.hpp index c2c7af2f678df..5f9ecb13d2d87 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/simd_size/Inputs/common.hpp +++ b/sycl/test-e2e/InvokeSimd/Spec/simd_size/Inputs/common.hpp @@ -69,7 +69,7 @@ template bool test(QueueTY q) { try { auto e = q.submit([&](handler &cgh) { cgh.parallel_for>( - Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] { + Range, [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(VL)]] { sub_group sg = ndi.get_sub_group(); group<1> g = ndi.get_group(); uint32_t i = sg.get_group_linear_id() * VL + diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp index 5ff856e68e0c8..fd835a147c6f1 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/tuple.cpp @@ -39,7 +39,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp index f1d1d2c462bfe..14be37cf1c435 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/tuple_return.cpp @@ -40,7 +40,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp b/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp index e78b05d4c121b..5663aec9ff23c 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/tuple_vadd.cpp @@ -46,7 +46,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/Spec/uniform_retval.cpp b/sycl/test-e2e/InvokeSimd/Spec/uniform_retval.cpp index 771818933572f..bd2458ff278b8 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/uniform_retval.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/uniform_retval.cpp @@ -62,7 +62,7 @@ #ifdef IMPL_SUBGROUP #define SUBGROUP_ATTR #else -#define SUBGROUP_ATTR [[intel::reqd_sub_group_size(VL)]] +#define SUBGROUP_ATTR [[sycl::reqd_sub_group_size(VL)]] #endif using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/InvokeSimd/invoke_simd_conv.cpp b/sycl/test-e2e/InvokeSimd/invoke_simd_conv.cpp index 023bc373611c6..00d59b8fe9f8f 100644 --- a/sycl/test-e2e/InvokeSimd/invoke_simd_conv.cpp +++ b/sycl/test-e2e/InvokeSimd/invoke_simd_conv.cpp @@ -88,7 +88,7 @@ template bool test(queue q) { try { auto e = q.submit([&](handler &cgh) { cgh.parallel_for>( - Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] { + Range, [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(VL)]] { sub_group sg = ndi.get_sub_group(); SpmdT val = (SpmdT)sg.get_group_linear_id(); // 0 .. GroupSize-1 SimdElemT res = 0; diff --git a/sycl/test-e2e/InvokeSimd/invoke_simd_smoke.cpp b/sycl/test-e2e/InvokeSimd/invoke_simd_smoke.cpp index e202e33a86df2..1db1a7c738be8 100644 --- a/sycl/test-e2e/InvokeSimd/invoke_simd_smoke.cpp +++ b/sycl/test-e2e/InvokeSimd/invoke_simd_smoke.cpp @@ -102,7 +102,7 @@ template bool test() { try { auto e = q.submit([&](handler &cgh) { - cgh.parallel_for(Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size( + cgh.parallel_for(Range, [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size( VL)]] { sub_group sg = ndi.get_sub_group(); group<1> g = ndi.get_group(); diff --git a/sycl/test-e2e/Matrix/element_wise_abc_impl.hpp b/sycl/test-e2e/Matrix/element_wise_abc_impl.hpp index 5caf6d3e0a3e7..59a3b9656a606 100644 --- a/sycl/test-e2e/Matrix/element_wise_abc_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_abc_impl.hpp @@ -36,7 +36,7 @@ void matrix_elem_wise_ops(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_half_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_half_impl.hpp index d2bdcbcb2d04a..509bdf010b999 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_half_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_half_impl.hpp @@ -36,7 +36,7 @@ void matrix_verify_op(big_matrix &A, const R ref, OP op) { cgh.parallel_for( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp index 765ab5c54f53f..ac6dc118ce8b2 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_impl.hpp @@ -40,7 +40,7 @@ void verify_op_ab(const T l, const T r, const float ref, OP op) { {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); @@ -80,7 +80,7 @@ void verify_op_c(const T l, const T r, const float ref, OP op) { {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_int8_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_int8_impl.hpp index 8a2f1f495e41d..b4b43d789eff9 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_int8_impl.hpp @@ -37,7 +37,7 @@ void matrix_verify_op(big_matrix &A, const R ref, OP op) { cgh.parallel_for( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp index 7336bb8467fa5..68069f297fa64 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_int8_packed_impl.hpp @@ -39,7 +39,7 @@ void matrix_verify_op(big_matrix &B, const TResult ref, OP op) { cgh.parallel_for( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/element_wise_all_ops_tf32_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_ops_tf32_impl.hpp index 2d7b3a36d8296..6e64c8a0c2bc6 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_ops_tf32_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_ops_tf32_impl.hpp @@ -35,7 +35,7 @@ void matrix_verify_op(big_matrix &A, const float ref, OP op) { cgh.parallel_for( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/element_wise_all_sizes_impl.hpp b/sycl/test-e2e/Matrix/element_wise_all_sizes_impl.hpp index 5228a154e9f6f..6a62c06d1dc5a 100644 --- a/sycl/test-e2e/Matrix/element_wise_all_sizes_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_all_sizes_impl.hpp @@ -46,7 +46,7 @@ void matrix_verify_add(const T1 val1, const T1 val2, const T1 result) { cgh.parallel_for( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp b/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp index 5c89e16c35f5b..5a2cf801eb3ec 100644 --- a/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp +++ b/sycl/test-e2e/Matrix/element_wise_ops_impl.hpp @@ -28,7 +28,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp b/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp index eb0a22fa30566..8dfe68e24bef7 100644 --- a/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp +++ b/sycl/test-e2e/Matrix/elemwise_irreg_size_ops_bf16.cpp @@ -50,7 +50,7 @@ void matrix_multiply(big_matrix &C, cgh.parallel_for( nd_range<2>({NDRangeM, NDRangeN * SG_SZ}, {1, 1 * SG_SZ}), [accA, accB, accC, M, N, K](nd_item<2> spmd_item) - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/get_coord_float_matC_impl.hpp b/sycl/test-e2e/Matrix/get_coord_float_matC_impl.hpp index bb0b6336a0ec5..a6c7c5646a548 100644 --- a/sycl/test-e2e/Matrix/get_coord_float_matC_impl.hpp +++ b/sycl/test-e2e/Matrix/get_coord_float_matC_impl.hpp @@ -39,7 +39,7 @@ void matrix_sum_rows(big_matrix &C, T *sum_rows) { {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/get_coord_int8_matA_impl.hpp b/sycl/test-e2e/Matrix/get_coord_int8_matA_impl.hpp index f3d89cc717ef4..0bf9281ab9f45 100644 --- a/sycl/test-e2e/Matrix/get_coord_int8_matA_impl.hpp +++ b/sycl/test-e2e/Matrix/get_coord_int8_matA_impl.hpp @@ -90,7 +90,7 @@ void matrix_sum_rows(big_matrix &A) { cgh.parallel_for>( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/get_coord_int8_matB_impl.hpp b/sycl/test-e2e/Matrix/get_coord_int8_matB_impl.hpp index 8b63dadc029b3..08cb616cc6cc4 100644 --- a/sycl/test-e2e/Matrix/get_coord_int8_matB_impl.hpp +++ b/sycl/test-e2e/Matrix/get_coord_int8_matB_impl.hpp @@ -117,7 +117,7 @@ void matrix_sum_cols(big_matrix &B, cgh.parallel_for>( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp index b3001a68bb227..816a381617a7b 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_all_sizes_impl.hpp @@ -31,7 +31,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_annotated_ptr_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_annotated_ptr_impl.hpp index 5463ea040d1eb..6da2e74d5a359 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_annotated_ptr_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_annotated_ptr_impl.hpp @@ -24,7 +24,7 @@ void matrix_multiply(T1 *C, T2 *A, T2 *B, queue &q) { nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/joint_matrix_apply_bf16_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_apply_bf16_impl.hpp index 3fc96f77e020a..b1237b0894fa7 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_apply_bf16_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_apply_bf16_impl.hpp @@ -29,7 +29,7 @@ void matrix_verify_add(big_matrix &A, const TResult ref, cgh.parallel_for( r, [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices_impl.hpp index 9751571bcbcf5..a88b0ca55416e 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices_impl.hpp @@ -39,7 +39,7 @@ bool apply_two_matrices(Tc *C, Tc *D, Ta *A, Ta *Ar, queue q) { nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp index 1b17dbee96a57..22ce2b3f0e16a 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_impl.hpp @@ -75,7 +75,7 @@ double joint_matmul(TOperand *A, TOperand *B, TResult *C, queue &q, int i // loop localrange [=](nd_item<2> it) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif // SG_SZ { // sg::load and sg::store expect decorations to be ON diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp index b9f474b9758dc..bd4a66d2c4dc9 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_load_store_impl.hpp @@ -18,7 +18,7 @@ void joint_B_rowmajor_load_store(Tb *B, Tb *OutB, queue &q) { h.parallel_for( nd_range<1>{global, local}, [=](nd_item<1> it) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { auto pB = diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp index 0cacda21b98e2..a7a502a3c16a1 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_rowmajorB_pair_load_store_impl.hpp @@ -18,7 +18,7 @@ void joint_B_rowmajor_pair_load_store(Tb *B, Tb *OutB, queue &q) { h.parallel_for( nd_range<1>{global, local}, [=](nd_item<1> it) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { auto pB = diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp index f393eaa5e8436..de2d1d89deaf5 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_array_impl.hpp @@ -31,7 +31,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // Matrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp index e3234da2cd5d9..d8f5e45474a77 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_colmajorA_colmajorB_impl.hpp @@ -29,7 +29,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp index ede4e795d0d69..00e804cef2fb5 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_impl.hpp @@ -29,7 +29,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_packedB_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_packedB_impl.hpp index 6a7182c41985d..85d33f2c83173 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bfloat16_packedB_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bfloat16_packedB_impl.hpp @@ -27,7 +27,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC_impl.hpp index 373ec652cc063..65b091477cae5 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_colA_rowB_colC_impl.hpp @@ -31,7 +31,7 @@ void matrix_multiply(T1 *C, T2 *A, T2 *B, queue q) { nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { auto pA = diff --git a/sycl/test-e2e/Matrix/joint_matrix_down_convert_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_down_convert_impl.hpp index 8ac48511c7e10..8c93114cf019a 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_down_convert_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_down_convert_impl.hpp @@ -32,7 +32,7 @@ void matrix_copy(big_matrix &C, big_matrix &A) { nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp index 69ee6d4da5464..e51e7c30fa810 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_half_impl.hpp @@ -28,7 +28,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB_impl.hpp index 1390f8225406c..b7b55fa42a929 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_int8_colmajorA_colmajorB_impl.hpp @@ -35,7 +35,7 @@ void matrix_multiply(big_matrix &C, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_int8_rowmajorA_rowmajorB_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_int8_rowmajorA_rowmajorB_impl.hpp index 12f91f083def4..187e199d9b14d 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_int8_rowmajorA_rowmajorB_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_int8_rowmajorA_rowmajorB_impl.hpp @@ -28,7 +28,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_opt_kernel_feature_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_opt_kernel_feature_impl.hpp index 7aba5911c8386..e07d151afe7af 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_opt_kernel_feature_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_opt_kernel_feature_impl.hpp @@ -32,7 +32,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/joint_matrix_out_bounds_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_out_bounds_impl.hpp index 000984087eed1..d8c02b2dc36fb 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_out_bounds_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_out_bounds_impl.hpp @@ -33,7 +33,7 @@ void matrix_multiply(T1 *C, T2 *A, T2 *B, queue q) { nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { auto pA = diff --git a/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp index 1e665f618860f..03dfa4649d6b3 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_prefetch_impl.hpp @@ -74,7 +74,7 @@ void matrix_multiply(T *C, T1 *A, T2 *B, queue q) { nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { const auto global_idx = spmd_item.get_global_id(0); diff --git a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp index 2dd752b0a9c78..b0a09f05f6a1a 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_query_default.cpp @@ -66,7 +66,7 @@ void matrix_multiply(big_matrix &C, cgh.parallel_for( nd_range<2>({NDRangeM, NDRangeN * SG_SZ}, {1, 1 * SG_SZ}), [accA, accB, accC, M, N, K](nd_item<2> spmd_item) - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_rowmajorA_rowmajorB_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_rowmajorA_rowmajorB_impl.hpp index 44ad7b5910076..87fbc1e90a386 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_rowmajorA_rowmajorB_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_rowmajorA_rowmajorB_impl.hpp @@ -27,7 +27,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp index 974a489002b47..ef08722a3f8ac 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_ss_int8_impl.hpp @@ -29,7 +29,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp index 17c9d47f61c36..5234af6b812ea 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_su_int8_impl.hpp @@ -29,7 +29,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_tf32_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_tf32_impl.hpp index 9fb40a78f8b30..02f3df63b1efd 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_tf32_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_tf32_impl.hpp @@ -37,7 +37,7 @@ void matrix_multiply(big_matrix &C, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The matrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_transposeC_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_transposeC_impl.hpp index 278e5da5cf441..58505b6fd4fb6 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_transposeC_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_transposeC_impl.hpp @@ -28,7 +28,7 @@ void matrix_load_and_store(T1 *input, T1 *out_col_major, T1 *out_row_major, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { auto p_input = diff --git a/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp index baef5c195a1e6..1a82d390cea8e 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_us_int8_impl.hpp @@ -29,7 +29,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp b/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp index 090b552848f0b..d86ff267e3f64 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp +++ b/sycl/test-e2e/Matrix/joint_matrix_uu_int8_impl.hpp @@ -29,7 +29,7 @@ void matrix_multiply(big_matrix &C, big_matrix &A, nd_range<2>({NDRangeM, NDRangeN * sg_size}, {1, 1 * sg_size}), [=](nd_item<2> spmd_item) #ifdef SG_SZ - [[intel::reqd_sub_group_size(SG_SZ)]] + [[sycl::reqd_sub_group_size(SG_SZ)]] #endif { // The submatrix API has to be accessed by all the workitems in a diff --git a/sycl/test-e2e/SubGroupMask/Basic.cpp b/sycl/test-e2e/SubGroupMask/Basic.cpp index d82a507b18dc9..40526c7ccad7e 100644 --- a/sycl/test-e2e/SubGroupMask/Basic.cpp +++ b/sycl/test-e2e/SubGroupMask/Basic.cpp @@ -35,7 +35,7 @@ int main() { auto resacc = resbuf.get_access(cgh); cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) [[intel::reqd_sub_group_size(32)]] { + NdRange, [=](nd_item<1> NdItem) [[sycl::reqd_sub_group_size(32)]] { size_t GID = NdItem.get_global_linear_id(); auto SG = NdItem.get_sub_group(); // AAAAAAAA diff --git a/sycl/test-e2e/SubGroupMask/GroupSize.cpp b/sycl/test-e2e/SubGroupMask/GroupSize.cpp index b61fa4aa0e69a..827dd6d351274 100644 --- a/sycl/test-e2e/SubGroupMask/GroupSize.cpp +++ b/sycl/test-e2e/SubGroupMask/GroupSize.cpp @@ -37,8 +37,8 @@ template void test(queue Queue) { auto resacc = resbuf.template get_access(cgh); cgh.parallel_for>( - NdRange, [= - ](nd_item<1> NdItem) [[intel::reqd_sub_group_size(SGSize)]] { + NdRange, + [=](nd_item<1> NdItem) [[sycl::reqd_sub_group_size(SGSize)]] { auto SG = NdItem.get_sub_group(); auto LID = SG.get_local_id(); auto SGID = SG.get_group_id(); diff --git a/sycl/test-e2e/syclcompat/util/util_logical_group.cpp b/sycl/test-e2e/syclcompat/util/util_logical_group.cpp index db387075fe2c9..8bf7ce8238379 100644 --- a/sycl/test-e2e/syclcompat/util/util_logical_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_logical_group.cpp @@ -68,7 +68,7 @@ void test_logical_group() { result_device = sycl::malloc_device(4, q_ct1); q_ct1.parallel_for( sycl::nd_range<3>(sycl::range<3>(1, 1, 52), sycl::range<3>(1, 1, 52)), - [=](sycl::nd_item<3> item_ct1) [[intel::reqd_sub_group_size(32)]] { + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(32)]] { kernel(result_device, item_ct1); }); q_ct1.memcpy(result_host, result_device, sizeof(unsigned int) * 4).wait(); diff --git a/sycl/test-e2e/syclcompat/util/util_match_all_over_group.cpp b/sycl/test-e2e/syclcompat/util/util_match_all_over_group.cpp index 7d72c0a5b39f8..a1abe3bae1ed0 100644 --- a/sycl/test-e2e/syclcompat/util/util_match_all_over_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_match_all_over_group.cpp @@ -92,7 +92,7 @@ void test_match_all_over_group() { sycl::queue q = syclcompat::get_default_queue(); q.parallel_for( sycl::nd_range<1>(threads.size(), threads.size()), - [=](sycl::nd_item<1> item) [[intel::reqd_sub_group_size(SUBGROUP_SIZE)]] { + [=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(SUBGROUP_SIZE)]] { for (auto id = item.get_global_linear_id(); id < DATA_SIZE; id += SUBGROUP_SIZE) d_output[id] = syclcompat::match_all_over_sub_group( diff --git a/sycl/test-e2e/syclcompat/util/util_match_any_over_group.cpp b/sycl/test-e2e/syclcompat/util/util_match_any_over_group.cpp index bee9ecc21272f..3a78768599f65 100644 --- a/sycl/test-e2e/syclcompat/util/util_match_any_over_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_match_any_over_group.cpp @@ -79,7 +79,7 @@ void test_match_any_over_group() { sycl::queue q = syclcompat::get_default_queue(); q.parallel_for( sycl::nd_range<1>(grid.size() * threads.size(), threads.size()), - [=](sycl::nd_item<1> item) [[intel::reqd_sub_group_size(SUBGROUP_SIZE)]] { + [=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(SUBGROUP_SIZE)]] { auto id = item.get_global_linear_id(); d_output[id] = syclcompat::match_any_over_sub_group( item.get_sub_group(), member_mask, d_input[id]); diff --git a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp index 18656e205ba25..7b877d826f18b 100644 --- a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp +++ b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp @@ -112,7 +112,7 @@ void test_permute_sub_group_by_xor() { q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { permute_sub_group_by_xor1(dev_data_u, item_ct1); }); @@ -139,7 +139,7 @@ void test_permute_sub_group_by_xor() { .wait(); q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { permute_sub_group_by_xor2(dev_data_u, item_ct1); }); diff --git a/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp b/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp index 592a1932b7ea2..ffad55f257430 100644 --- a/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp @@ -108,7 +108,7 @@ void test_select_from_sub_group() { .wait(); q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { select_from_sub_group1(dev_data_u, item_ct1); }); @@ -134,7 +134,7 @@ void test_select_from_sub_group() { .wait(); q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { select_from_sub_group2(dev_data_u, item_ct1); }); diff --git a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp index 81c2bd2c9616d..0fac1ee013d06 100644 --- a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp +++ b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp @@ -107,7 +107,7 @@ void test_shift_sub_group_left() { .wait(); q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { shift_sub_group_left1(dev_data_u, item_ct1); }); dev_ct1.queues_wait_and_throw(); @@ -134,7 +134,7 @@ void test_shift_sub_group_left() { q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { shift_sub_group_left2(dev_data_u, item_ct1); }); diff --git a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp index 5204586df79a1..0dbc985170f03 100644 --- a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp +++ b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp @@ -108,7 +108,7 @@ void test_shift_sub_group_right() { .wait(); q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { shift_sub_group_right1(dev_data_u, item_ct1); }); @@ -135,7 +135,7 @@ void test_shift_sub_group_right() { q_ct1->parallel_for(sycl::nd_range<3>(GridSize * BlockSize, BlockSize), [=](sycl::nd_item<3> item_ct1) - [[intel::reqd_sub_group_size(32)]] { + [[sycl::reqd_sub_group_size(32)]] { shift_sub_group_right2(dev_data_u, item_ct1); }); diff --git a/sycl/test/check_device_code/matrix/matrix-int8-test.cpp b/sycl/test/check_device_code/matrix/matrix-int8-test.cpp index 30008cf5b99fb..ee56a0c73fe61 100644 --- a/sycl/test/check_device_code/matrix/matrix-int8-test.cpp +++ b/sycl/test/check_device_code/matrix/matrix-int8-test.cpp @@ -27,7 +27,7 @@ using namespace sycl::ext::oneapi::experimental::matrix; // int8_t B[MATRIX_K / 4][MATRIX_N * 4]; // int32_t C[MATRIX_M][MATRIX_N]; -SYCL_EXTERNAL [[intel::reqd_sub_group_size(SG_SZ)]] void +SYCL_EXTERNAL [[sycl::reqd_sub_group_size(SG_SZ)]] void matrix_multiply(size_t NUM_COLS_C, size_t NUM_COLS_A, sycl::accessor accA, sycl::accessor accB, diff --git a/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp b/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp index 34fae66a8f09a..771235690ac8d 100644 --- a/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp +++ b/sycl/test/check_device_code/matrix/matrix_load_store_as.cpp @@ -9,7 +9,7 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -SYCL_EXTERNAL [[intel::reqd_sub_group_size(16)]] void matrix_store_as( +SYCL_EXTERNAL [[sycl::reqd_sub_group_size(16)]] void matrix_store_as( multi_ptr pA, multi_ptr pB, multi_ptr pC, diff --git a/sycl/test/esimd/slm_init_invoke_simd.cpp b/sycl/test/esimd/slm_init_invoke_simd.cpp index bc57240644002..899e498de807c 100644 --- a/sycl/test/esimd/slm_init_invoke_simd.cpp +++ b/sycl/test/esimd/slm_init_invoke_simd.cpp @@ -25,7 +25,7 @@ SYCL_EXTERNAL int main() { queue Q; nd_range<1> NDR{range<1>{2}, range<1>{2}}; - Q.parallel_for(NDR, [=](nd_item<1> NDI) [[intel::reqd_sub_group_size(16)]] { + Q.parallel_for(NDR, [=](nd_item<1> NDI) [[sycl::reqd_sub_group_size(16)]] { sub_group sg = NDI.get_sub_group(); invoke_simd(sg, SIMD_CALLEE_VOID); }).wait(); diff --git a/sycl/test/extensions/inline_asm.cpp b/sycl/test/extensions/inline_asm.cpp index 613484cc0474c..0ce33c8d7dc7a 100644 --- a/sycl/test/extensions/inline_asm.cpp +++ b/sycl/test/extensions/inline_asm.cpp @@ -38,8 +38,8 @@ int main() { auto B = BufB.get_access(cgh); auto C = BufC.get_access(cgh); cgh.parallel_for( - sycl::range<1>{DEFAULT_PROBLEM_SIZE}, [= - ](sycl::id<1> wiID) [[intel::reqd_sub_group_size(8)]] { + sycl::range<1>{DEFAULT_PROBLEM_SIZE}, + [=](sycl::id<1> wiID) [[sycl::reqd_sub_group_size(8)]] { #if defined(__SYCL_DEVICE_ONLY__) asm volatile( ".decl P1 v_type=P num_elts=8\n" @@ -58,10 +58,10 @@ int main() { : "+rw"(C[wiID]) : "rw"(A[wiID]), "rw"(B[wiID])); #else - C[wiID] = 0; - for (int i = 0; i < A[wiID]; ++i) { - C[wiID] = C[wiID] + B[wiID]; - } + C[wiID] = 0; + for (int i = 0; i < A[wiID]; ++i) { + C[wiID] = C[wiID] + B[wiID]; + } #endif }); }); diff --git a/sycl/test/invoke_simd/invoke_simd.cpp b/sycl/test/invoke_simd/invoke_simd.cpp index 33017aa2d9337..a7d3e11983ebb 100644 --- a/sycl/test/invoke_simd/invoke_simd.cpp +++ b/sycl/test/invoke_simd/invoke_simd.cpp @@ -110,7 +110,7 @@ int main(void) { try { auto e = q.submit([&](handler &cgh) { cgh.parallel_for( - Range, [=](nd_item<1> ndi) [[intel::reqd_sub_group_size(VL)]] { + Range, [=](nd_item<1> ndi) [[sycl::reqd_sub_group_size(VL)]] { sub_group sg = ndi.get_sub_group(); group<1> g = ndi.get_group(); uint32_t i = diff --git a/sycl/test/invoke_simd/invoke_simd_address_space_inferral.cpp b/sycl/test/invoke_simd/invoke_simd_address_space_inferral.cpp index 43de05867ec03..15bc6f3466ef1 100644 --- a/sycl/test/invoke_simd/invoke_simd_address_space_inferral.cpp +++ b/sycl/test/invoke_simd/invoke_simd_address_space_inferral.cpp @@ -50,7 +50,7 @@ bool test() { try { auto e = q.submit([&](handler &cgh) { local_accessor LocalAcc(Size, cgh); - cgh.parallel_for(Range, [=](nd_item<1> item) [[intel::reqd_sub_group_size( + cgh.parallel_for(Range, [=](nd_item<1> item) [[sycl::reqd_sub_group_size( VL)]] { sycl::group<1> g = item.get_group(); sycl::sub_group sg = item.get_sub_group(); diff --git a/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp b/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp index 02cfbc0f8b904..baefb8d602487 100644 --- a/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp +++ b/sycl/test/matrix/matrix-bfloat16-test-coord-basicB.cpp @@ -147,7 +147,7 @@ void matrix_sum_cols(queue q, big_matrix &B, nd_range<2> &r) { auto os = sycl::stream(100000, 6144, cgh); cgh.parallel_for( - r, [=](nd_item<2> spmd_item) [[intel::reqd_sub_group_size(SG_SZ)]] { + r, [=](nd_item<2> spmd_item) [[sycl::reqd_sub_group_size(SG_SZ)]] { const auto global_idx = spmd_item.get_global_id(0); const auto global_idy = spmd_item.get_global_id(1); const auto sg_startx = global_idx - spmd_item.get_local_id(0); diff --git a/sycl/test/matrix/matrix-tf32-test.cpp b/sycl/test/matrix/matrix-tf32-test.cpp index fb2e7ab66492d..5e8413346c3d0 100644 --- a/sycl/test/matrix/matrix-tf32-test.cpp +++ b/sycl/test/matrix/matrix-tf32-test.cpp @@ -47,7 +47,7 @@ void matrix_multiply(big_matrix &C, cgh.parallel_for( nd_range<2>({NDRangeM, NDRangeN * SG_SZ}, {1, 1 * SG_SZ}), - [=](nd_item<2> spmd_item) [[intel::reqd_sub_group_size(SG_SZ)]] + [=](nd_item<2> spmd_item) [[sycl::reqd_sub_group_size(SG_SZ)]] { // The matrix API has to be accessed by all the workitems in a diff --git a/sycl/test/warnings/warnings.cpp b/sycl/test/warnings/warnings.cpp index 2fb70f50ddb23..86bc5789e44c2 100644 --- a/sycl/test/warnings/warnings.cpp +++ b/sycl/test/warnings/warnings.cpp @@ -4,6 +4,7 @@ #include using namespace sycl; + int main() { vec newVec; queue myQueue; @@ -39,4 +40,4 @@ template class device_image; template class device_image; template class device_image; -} +} // namespace sycl From 47b5ed2f1c6961cdcb31769efd08af7d7ab1ac8b Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Tue, 29 Oct 2024 17:45:44 +0100 Subject: [PATCH 9/9] Apply changes from https://github.com/intel/llvm/pull/15914 --- libclc/CMakeLists.txt | 109 ++++++++++++------- libclc/cmake/modules/AddLibclc.cmake | 151 +++++++++++++++++---------- libclc/generic/lib/SOURCES | 1 - 3 files changed, 167 insertions(+), 94 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index cf456a19cf4db..09845614b21dc 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -181,7 +181,7 @@ if( "spirv-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD OR "spirv64-mesa3d-" IN_LIST endif() add_custom_target(libspirv-builtins COMMENT "Build libspirv builtins") -add_custom_target(libclc-builtins COMMENT "Build libclc builtins") +add_custom_target(libopencl-builtins COMMENT "Build libclc builtins") set(LIBCLC_TARGET_TO_TEST) @@ -329,19 +329,40 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) " configuration, some SYCL programs may fail to build.") endif() - set( lib_files ) - set( lib_gen_files ) - libclc_configure_lib_source(lib_files lib_gen_files - LIB_DIR lib + set( opencl_lib_files ) + set( opencl_gen_files ) + + if( NOT ARCH STREQUAL spirv AND NOT ARCH STREQUAL spirv64 ) + if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) + list( APPEND opencl_gen_files clspv-convert.cl ) + elseif ( NOT ENABLE_RUNTIME_SUBNORMAL ) + list( APPEND opencl_gen_files convert-clc.cl ) + list( APPEND opencl_lib_files generic/libspirv/subnormal_use_default.ll ) + endif() + endif() + + libclc_configure_lib_source( + opencl_lib_files DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} - DEPS convert-clc.cl ) + ) - set( libspirv_files ) + set( libspirv_lib_files ) set( libspirv_gen_files ) - libclc_configure_lib_source(libspirv_files libspirv_gen_files + + if( NOT ARCH STREQUAL spirv AND NOT ARCH STREQUAL spirv64 ) + if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) + list( APPEND libspirv_gen_files clspv-convert.cl ) + elseif ( NOT ENABLE_RUNTIME_SUBNORMAL ) + list( APPEND libspirv_gen_files convert-spirv.cl convert-core.cl ) + list( APPEND libspirv_lib_files generic/libspirv/subnormal_use_default.ll ) + endif() + endif() + + libclc_configure_lib_source( + libspirv_lib_files LIB_DIR libspirv DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} - DEPS convert-spirv.cl convert-core.cl) + ) foreach( d ${${t}_devices} ) get_libclc_device_info( @@ -353,29 +374,25 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) ) # Some targets don't have a specific GPU to target - set( flags ) + set( build_flags ) if( d STREQUAL none OR ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) # FIXME: Ideally we would not be tied to a specific PTX ISA version if( ARCH STREQUAL nvptx OR ARCH STREQUAL nvptx64 ) # Disables NVVM reflection to defer to after linking - list( APPEND flags -Xclang -target-feature -Xclang +ptx72 + list( APPEND build_flags -Xclang -target-feature -Xclang +ptx72 -march=sm_86 -mllvm --nvvm-reflect-enable=false) elseif( ARCH STREQUAL amdgcn ) # AMDGCN needs libclc to be compiled to high bc version since all atomic # clang builtins need to be accessible - list( APPEND flags -mcpu=gfx940 -mllvm --amdgpu-oclc-reflect-enable=false ) + list( APPEND build_flags -mcpu=gfx940 -mllvm --amdgpu-oclc-reflect-enable=false ) elseif( IS_NATIVE_CPU_ARCH ) - list( APPEND flags -Xclang -fsycl-is-native-cpu ) + list( APPEND build_flags -Xclang -fsycl-is-native-cpu ) if( ARCH STREQUAL x86_64 ) - list( APPEND flags ${LIBCLC_NATIVECPU_FLAGS_X86_64}) + list( APPEND build_flags ${LIBCLC_NATIVECPU_FLAGS_X86_64}) endif() endif() endif() - if( NOT "${cpu}" STREQUAL "" ) - list( APPEND flags -mcpu=${cpu} ) - endif() - message( STATUS " device: ${d} ( ${${d}_aliases} )" ) # Note: when declaring builtins, we must consider that even if a target @@ -410,7 +427,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Enable SPIR-V builtin function declarations, so they don't # have to be explicity declared in the soruce. - list( APPEND flags -Xclang -fdeclare-spirv-builtins) + list( APPEND build_flags -Xclang -fdeclare-spirv-builtins) set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) @@ -428,7 +445,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) if( supports_generic_addrspace ) string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" ) if( has_distinct_generic_addrspace ) - list( APPEND flags -D__CLC_DISTINCT_GENERIC_ADDRSPACE__ ) + list( APPEND build_flags -D__CLC_DISTINCT_GENERIC_ADDRSPACE__ ) endif() else() # Explictly disable opencl_c_generic_address_space (it may be enabled @@ -438,42 +455,60 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) string( APPEND CL_3_0_EXTENSIONS ",-__opencl_c_pipes" ) string( APPEND CL_3_0_EXTENSIONS ",-__opencl_c_device_enqueue" ) endif() - list( APPEND flags -cl-std=CL3.0 "-Xclang" ${CL_3_0_EXTENSIONS} ) + list( APPEND build_flags -cl-std=CL3.0 "-Xclang" ${CL_3_0_EXTENSIONS} ) # Add platform specific flags if(WIN32) - list(APPEND flags -D_WIN32) + list(APPEND build_flags -D_WIN32) elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") - list(APPEND flags -D__APPLE__) + list(APPEND build_flags -D__APPLE__) elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - list(APPEND flags -D__unix__ -D__linux__) + list(APPEND build_flags -D__unix__ -D__linux__) else() # Assume some UNIX system otherwise - list(APPEND flags -D__unix__) + list(APPEND build_flags -D__unix__) endif() - add_libclc_builtin_set(libspirv-${arch_suffix} + string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE ) + + list( APPEND build_flags + -D__CLC_INTERNAL + -D${CLC_TARGET_DEFINE} + -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include + # FIXME: Fix libclc to not require disabling this noisy warning + -Wno-bitwise-conditional-parentheses + ) + + if( NOT "${cpu}" STREQUAL "" ) + list( APPEND build_flags -mcpu=${cpu} ) + endif() + + add_libclc_builtin_set( + ARCH ${ARCH} + ARCH_SUFFIX libspirv-${arch_suffix} TRIPLE ${clang_triple} - TARGET_ENV libspirv - COMPILE_OPT ${flags} + TARGET_ENV libspirv- + COMPILE_FLAGS ${build_flags} OPT_FLAGS ${opt_flags} - FILES ${libspirv_files} + LIB_FILES ${libspirv_lib_files} GEN_FILES ${libspirv_gen_files} ALIASES ${${d}_aliases} GENERATE_TARGET "generate_convert_spirv.cl" "generate_convert_core.cl" - PARENT_TARGET libspirv-builtins) + PARENT_TARGET libspirv-builtins + ) - add_libclc_builtin_set(clc-${arch_suffix} + add_libclc_builtin_set( + ARCH ${ARCH} + ARCH_SUFFIX ${arch_suffix} TRIPLE ${clang_triple} - TARGET_ENV clc - COMPILE_OPT ${flags} + COMPILE_FLAGS ${build_flags} OPT_FLAGS ${opt_flags} - FILES ${lib_files} - GEN_FILES ${lib_gen_files} - LIB_DEP libspirv-${arch_suffix} + LIB_FILES ${opencl_lib_files} + GEN_FILES ${opencl_gen_files} ALIASES ${${d}_aliases} GENERATE_TARGET "generate_convert_clc.cl" - PARENT_TARGET libclc-builtins) + PARENT_TARGET libopencl-builtins + ) endforeach( d ) endforeach( t ) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index dc4c6c47a66e8..50c25961d0310 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -266,39 +266,48 @@ function(process_bc out_file) ) endfunction() -# add_libclc_builtin_set(arch_suffix -# TRIPLE string -# Triple used to compile -# TARGET_ENV string -# "clc" or "libspirv" -# FILES string ... -# List of file that should be built for this library -# ALIASES string ... -# List of alises -# COMPILE_OPT -# Compilation options -# LIB_DEP -# Library to include to the builtin set -# ) -macro(add_libclc_builtin_set arch_suffix) +# Compiles a list of library source files (provided by LIB_FILES/GEN_FILES) and +# compiles them to LLVM bytecode (or SPIR-V), links them together and optimizes +# them. +# +# For bytecode libraries, a list of ALIASES may optionally be provided to +# produce additional symlinks. +# +# Arguments: +# * ARCH +# libclc architecture being built +# * ARCH_SUFFIX +# libclc architecture/triple suffix +# * TRIPLE +# Triple used to compile +# +# Optional Arguments: +# * LIB_FILES ... +# List of files that should be built for this library +# * GEN_FILES ... +# List of generated files (in build dir) that should be built for this library +# * COMPILE_FLAGS ... +# Compilation options (for clang) +# * OPT_FLAGS ... +# Optimization options (for opt) +# * TARGET_ENV +# Prefix to give the final builtin library aliases +# * ALIASES ... +# List of aliases +function(add_libclc_builtin_set) cmake_parse_arguments(ARG "" - "TRIPLE;TARGET_ENV;LIB_DEP;PARENT_TARGET" - "GEN_FILES;FILES;ALIASES;GENERATE_TARGET;COMPILE_OPT;OPT_FLAGS" - ${ARGN}) - - string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE ) - - list( APPEND ARG_COMPILE_OPT - -D__CLC_INTERNAL - -D${CLC_TARGET_DEFINE} - -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include - # FIXME: Fix libclc to not require disabling this noisy warning - -Wno-bitwise-conditional-parentheses + "ARCH;TRIPLE;ARCH_SUFFIX;TARGET_ENV;PARENT_TARGET" + "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES" + ${ARGN} ) + if( NOT ARG_ARCH OR NOT ARG_ARCH_SUFFIX OR NOT ARG_TRIPLE ) + message( FATAL_ERROR "Must provide ARCH, ARCH_SUFFIX, and TRIPLE" ) + endif() + set( bytecode_files "" ) - foreach( file IN LISTS ARG_GEN_FILES ARG_FILES ) + foreach( file IN LISTS ARG_GEN_FILES ARG_LIB_FILES ) # We need to take each file and produce an absolute input file, as well # as a unique architecture-specific output file. We deal with a mix of # different input files, which makes this trickier. @@ -326,18 +335,19 @@ macro(add_libclc_builtin_set arch_suffix) INPUT ${input_file} OUTPUT ${output_file} EXTRA_OPTS -fno-builtin -nostdlib - "${ARG_COMPILE_OPT}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} + "${ARG_COMPILE_FLAGS}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} DEPENDENCIES generate_convert.cl clspv-generate_convert.cl ) - list(APPEND bytecode_files ${output_file}) + list( APPEND bytecode_files ${output_file} ) endforeach() - set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} ) + set( builtins_comp_lib_tgt builtins.comp.${ARG_ARCH_SUFFIX} ) add_custom_target( ${builtins_comp_lib_tgt} DEPENDS ${bytecode_files} ) + set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" ) - set( builtins_link_lib_tgt builtins.link.${arch_suffix} ) + set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} ) link_bc( TARGET ${builtins_link_lib_tgt} INPUTS ${bytecode_files} @@ -351,9 +361,23 @@ macro(add_libclc_builtin_set arch_suffix) COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} DEPENDS ${builtins_link_lib} prepare_builtins ) - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) + if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 ) + set( spv_suffix ${ARG_ARCH_SUFFIX}.spv ) + add_custom_command( OUTPUT ${spv_suffix} + COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib} + DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) + set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} + DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + + return() + endif() + + set( builtins_opt_lib_tgt builtins.opt.${ARG_ARCH_SUFFIX} ) - process_bc(${arch_suffix}.bc + process_bc(${ARG_ARCH_SUFFIX}.bc LIB_TGT ${builtins_opt_lib_tgt} IN_FILE ${builtins_link_lib} OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} @@ -362,7 +386,7 @@ macro(add_libclc_builtin_set arch_suffix) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. - set( obj_suffix ${arch_suffix}.bc ) + set( obj_suffix ${ARG_ARCH_SUFFIX}.bc ) add_dependencies(${ARG_PARENT_TARGET} prepare-${obj_suffix}) set( builtins_lib $ ) @@ -436,7 +460,7 @@ macro(add_libclc_builtin_set arch_suffix) endif() # nvptx-- targets don't include workitem builtins - if( NOT ${t} MATCHES ".*ptx.*--$" ) + if( NOT ARG_TRIPLE MATCHES ".*ptx.*--$" ) add_test( NAME external-calls-${obj_suffix} COMMAND ./check_external_calls.sh ${builtins-lib} WORKING_DIRECTORY ${LIBCLC_LIBRARY_OUTPUT_INTDIR} ) @@ -445,26 +469,50 @@ macro(add_libclc_builtin_set arch_suffix) endif() foreach( a ${$ARG_ALIASES} ) - set( alias_suffix "${ARG_TARGET_ENV}-${a}-${ARG_TRIPLE}.bc" ) + set( alias_suffix "${ARG_TARGET_ENV}${a}-${ARG_TRIPLE}.bc" ) add_libclc_alias( ${alias_suffix} ${arch_suffix} PARENT_TARGET ${ARG_PARENT_TARGET}) endforeach( a ) -endmacro(add_libclc_builtin_set arch_suffix) +endfunction(add_libclc_builtin_set) -function(libclc_configure_lib_source OUT_LIST OUT_GEN_LIST) +# Produces a list of libclc source files by walking over SOURCES files in a +# given directory. Outputs the list of files in LIB_FILE_LIST. +# +# LIB_FILE_LIST may be pre-populated and is appended to. +# +# Arguments: +# * LIB_ROOT_DIR +# Root directory containing target's lib files, relative to libclc root +# directory. If not provided, is set to '.'. +# * LIB_DIR +# Name of the directory containing the target's lib files. If not provided, +# is set to 'lib'. +# * DIRS ... +# List of directories under LIB_ROOT_DIR to walk over searching for SOURCES +# files +function(libclc_configure_lib_source LIB_FILE_LIST) cmake_parse_arguments(ARG "" - "LIB_DIR" - "DIRS;DEPS" - ${ARGN}) + "LIB_DIR;LIB_ROOT_DIR" + "DIRS" + ${ARGN} + ) + + if( NOT ARG_LIB_ROOT_DIR ) + set(ARG_LIB_ROOT_DIR ".") + endif() + + if( NOT ARG_LIB_DIR ) + set(ARG_LIB_DIR "lib") + endif() # Enumerate SOURCES* files set( source_list ) foreach( l ${ARG_DIRS} ) foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" ) - file( TO_CMAKE_PATH ${l}/${ARG_LIB_DIR}/${s} file_loc ) + file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/${ARG_LIB_DIR}/${s} file_loc ) file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc ) # Prepend the location to give higher priority to # specialized implementation @@ -476,15 +524,8 @@ function(libclc_configure_lib_source OUT_LIST OUT_GEN_LIST) ## Add the generated convert files here to prevent adding the ones listed in ## SOURCES - set( objects ${ARG_DEPS} ) # A "set" of already-added input files - set( rel_files ) # Source directory input files, relative to the root dir - set( gen_files ${ARG_DEPS} ) # Generated binary input files, relative to the binary dir - - if( NOT ENABLE_RUNTIME_SUBNORMAL ) - if( EXISTS generic/${ARG_LIB_DIR}/subnormal_use_default.ll ) - list( APPEND rel_files generic/${ARG_LIB_DIR}/subnormal_use_default.ll ) - endif() - endif() + set( rel_files ${${LIB_FILE_LIST}} ) # Source directory input files, relative to the root dir + set( objects ${${LIB_FILE_LIST}} ) # A "set" of already-added input files foreach( l ${source_list} ) file( READ ${l} file_list ) @@ -499,7 +540,5 @@ function(libclc_configure_lib_source OUT_LIST OUT_GEN_LIST) endforeach() endforeach() - set( ${OUT_LIST} ${rel_files} PARENT_SCOPE ) - set( ${OUT_GEN_LIST} ${gen_files} PARENT_SCOPE ) - -endfunction(libclc_configure_lib_source OUT_LIST OUT_GEN_LIST) + set( ${LIB_FILE_LIST} ${rel_files} PARENT_SCOPE ) +endfunction(libclc_configure_lib_source LIB_FILE_LIST) diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 0a7558459be24..080acc2249654 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -46,7 +46,6 @@ cl_khr_int64_extended_atomics/atom_max.cl cl_khr_int64_extended_atomics/atom_min.cl cl_khr_int64_extended_atomics/atom_or.cl cl_khr_int64_extended_atomics/atom_xor.cl -convert-clc.cl common/degrees.cl common/mix.cl common/radians.cl