From ad04284c71015337915abfe7e845d1d479cdc001 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 27 Mar 2025 19:50:16 +0100 Subject: [PATCH] [SYCL] Switch to use plain array in `sycl::vec` in more cases (#17656) The problem with using `std::array` in `sycl::vec` is that we cannot compile it in some environments (namely, Windows) because the former may use something that is illegal in SYCL device code. intel/llvm#17025 fixed that, but only did so under preview breaking changes mode, which does not satisfy some of our customers immediately. This PR introduces two main changes: - it allows to opt-in for new behavior through passing `-D__SYCL_USE_NEW_VEC_IMPL=1` macro without using `-fpreview-breaking-changes` flag. That allows for a more gradual opt-in from customers who are interested in this fix - it switches the imlpementation to use the new approach with C-style arrays if their size and alignment is the same as for the corresponding `std::array` - in that case their memory layout is expected to be absolutely the same and therefore it should be safe to use the new approach without fear of some ABI incompatibilities. This allows for customers to benefit from the fix without specifying any extra macro (which should be the case for the most common platforms out there) --- sycl/include/sycl/detail/vector_convert.hpp | 3 +- sycl/include/sycl/vector.hpp | 43 ++++++++- sycl/test/abi/layout_vec.cpp | 6 +- sycl/test/basic_tests/vectors/storage.cpp | 43 +++++++++ .../vector/vector_bf16_builtins.cpp | 10 +-- .../vector/vector_convert_bfloat.cpp | 8 +- .../vector/vector_math_ops.cpp | 90 +++++++++---------- sycl/test/regression/vec_array_windows.cpp | 23 +++++ 8 files changed, 165 insertions(+), 61 deletions(-) create mode 100644 sycl/test/basic_tests/vectors/storage.cpp create mode 100644 sycl/test/regression/vec_array_windows.cpp diff --git a/sycl/include/sycl/detail/vector_convert.hpp b/sycl/include/sycl/detail/vector_convert.hpp index c4bd584be80da..cacc46ecaf0a4 100644 --- a/sycl/include/sycl/detail/vector_convert.hpp +++ b/sycl/include/sycl/detail/vector_convert.hpp @@ -57,6 +57,7 @@ #include // for is_sigeninteger, is_s... #include // for errc +#include #include // bfloat16 #include @@ -941,7 +942,7 @@ vec vec::convert() const { if constexpr (canUseNativeVectorConvert) { auto val = detail::convertImpl(NativeVector); - Result.m_Data = sycl::bit_cast(val); + sycl::detail::memcpy_no_adl(&Result.m_Data, &val, sizeof(Result)); } else #endif // __SYCL_DEVICE_ONLY__ { diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 97d9704c3cc26..27732f7055de8 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -22,6 +22,15 @@ #endif #endif // __clang__ +// See vec::DataType definitions for more details +#ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1 +#else +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0 +#endif +#endif + #if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__) #error "SYCL device compiler is built without ext_vector_type support" #endif @@ -85,6 +94,9 @@ struct elem { }; namespace detail { +// To be defined in tests, trick to access vec's private methods +template class vec_base_test; + template class OperationCurrentT, int... Indexes> class SwizzleOp; @@ -149,7 +161,34 @@ class __SYCL_EBO vec // This represent type of underlying value. There should be only one field // in the class, so vec should be equal to float16 in memory. - using DataType = std::array; + // + // In intel/llvm#14130 we incorrectly used std::array as an underlying storage + // for vec data. The problem with std::array is that it comes from the C++ + // STL headers which we do not control and they may use something that is + // illegal in SYCL device code. One of specific examples is use of debug + // assertions in MSVC's STL implementation. + // + // The better approach is to use plain C++ array, but the problem here is that + // C++ specification does not provide any guarantees about the memory layout + // of std::array and therefore directly switching to it would technically be + // an ABI-break, even though the practical chances of encountering the issue + // are low. + // + // To play it safe, we only switch to use plain array if both its size and + // alignment match those of std::array, or unless the new behavior is forced + // via __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE or preview breaking changes mode. + using DataType = std::conditional_t< +#if __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE + true, +#else + sizeof(std::array) == sizeof(DataT[AdjustedNum]) && + alignof(std::array) == + alignof(DataT[AdjustedNum]), +#endif + DataT[AdjustedNum], std::array>; + + // To allow testing of private methods + template friend class detail::vec_base_test; #ifdef __SYCL_DEVICE_ONLY__ using element_type_for_vector_t = typename detail::map_type< @@ -318,7 +357,7 @@ class __SYCL_EBO vec typename vector_t_ = vector_t, typename = typename std::enable_if_t>> constexpr vec(vector_t_ openclVector) { - m_Data = sycl::bit_cast(openclVector); + sycl::detail::memcpy_no_adl(&m_Data, &openclVector, sizeof(openclVector)); } /* @SYCL2020 diff --git a/sycl/test/abi/layout_vec.cpp b/sycl/test/abi/layout_vec.cpp index ade053f068f30..20ea608a88add 100644 --- a/sycl/test/abi/layout_vec.cpp +++ b/sycl/test/abi/layout_vec.cpp @@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec) {} // CHECK: 0 | class sycl::vec // ignore empty base classes -// CHECK: 0 | struct std::array m_Data -// CHECK-NEXT: 0 | typename _AT_Type::_Type _M_elems +// CHECK: 0 | DataType m_Data // CHECK-NEXT: | [sizeof=16, dsize=16, align=16, // CHECK-NEXT: | nvsize=16, nvalign=16] @@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec) {} // CHECK: 0 | class sycl::vec<_Bool, 16> // ignore empty base classes -// CHECK: 0 | struct std::array<_Bool, 16> m_Data -// CHECK-NEXT: 0 | typename _AT_Type::_Type _M_elems +// CHECK: 0 | DataType m_Data // CHECK-NEXT: | [sizeof=16, dsize=16, align=16, // CHECK-NEXT: | nvsize=16, nvalign=16] diff --git a/sycl/test/basic_tests/vectors/storage.cpp b/sycl/test/basic_tests/vectors/storage.cpp new file mode 100644 index 0000000000000..bbb14cfe24227 --- /dev/null +++ b/sycl/test/basic_tests/vectors/storage.cpp @@ -0,0 +1,43 @@ +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes +// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE=1 +// expected-no-diagnostics + +#include + +#include + +namespace sycl { +namespace detail { +template class vec_base_test { +public: + static void do_check() { + constexpr bool uses_std_array = + std::is_same_v::DataType, std::array>; + constexpr bool uses_plain_array = + std::is_same_v::DataType, T[N]>; + + constexpr bool std_array_and_plain_array_have_the_same_layout = + sizeof(std::array) == sizeof(T[N]) && + alignof(std::array) == alignof(T[N]); + +#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || \ + __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE + static_assert(uses_plain_array, + "We must use plain array regardless of " + "layout, because user is opted-in for a potential ABI-break"); +#else + static_assert(std_array_and_plain_array_have_the_same_layout == + uses_plain_array, + "If layouts are the same, we must use safer plain array " + "instead of std::array, or vice versa"); + static_assert( + !std_array_and_plain_array_have_the_same_layout == uses_std_array, + "If layouts are not the same, we must use std::array to preserve ABI"); +#endif + } +}; +} // namespace detail +} // namespace sycl + +int main() { sycl::detail::vec_base_test::do_check(); } diff --git a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp index 891c79a1a8f94..5eeb4325181d2 100644 --- a/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/vector_bf16_builtins.cpp @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.38") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.38") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.32") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.48") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 32 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[B:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.40") align 32 [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 diff --git a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp index 3166aacc915e3..98d515413587c 100644 --- a/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/vector_convert_bfloat.cpp @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -93,7 +93,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 2 dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] @@ -191,7 +191,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.15") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.12") align 2 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 8 dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] @@ -204,7 +204,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 4 [[AGG_RESULT:%.*]], ptr addrspace(4) nocapture noundef readonly align 4 dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 9bbd959edb9cd..2b900175dc9da 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -44,7 +44,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.5") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.5") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.4") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.4") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META19]] @@ -57,7 +57,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.10") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.10") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META22:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.8") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.8") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META22:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA11]], !noalias [[META23]] @@ -71,7 +71,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.15") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable sret(%"class.sycl::_V1::vec.12") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META27]] @@ -84,11 +84,11 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLIBEENS0_3VECIBLI4EEERKS4_S6__EXIT:%.*]] // CHECK: for.body.i.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I_I_I_I_I]], align 1, !tbaa [[TBAA11]], !alias.scope [[META27]] +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA11]], !alias.scope [[META27]] // CHECK-NEXT: [[CMP3_I_I:%.*]] = icmp ne i8 [[TMP2]], 0 // CHECK-NEXT: [[FROMBOOL_I_I:%.*]] = zext i1 [[CMP3_I_I]] to i8 -// CHECK-NEXT: store i8 [[FROMBOOL_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I_I_I]], align 1, !tbaa [[TBAA30:![0-9]+]], !alias.scope [[META27]] +// CHECK-NEXT: store i8 [[FROMBOOL_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 1, !tbaa [[TBAA30:![0-9]+]], !alias.scope [[META27]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplIbEENS0_3vecIbLi4EEERKS4_S6_.exit: @@ -97,7 +97,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.20") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_used_aspects [[META35:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.16") align 8 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.16") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_used_aspects [[META35:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META37]] @@ -110,7 +110,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META40:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.20") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.20") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META40:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -123,17 +123,17 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECIS5_LI3EEERKS7_S9__EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I10_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META41]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META44:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I10_I]]) #[[ATTR8]], !noalias [[META44]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META44:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I10_I]]) #[[ATTR8]], !noalias [[META44]] // CHECK-NEXT: [[ADD_I_I:%.*]] = fadd float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] // CHECK-NEXT: store float [[ADD_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA47:![0-9]+]], !noalias [[META44]] // CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR8]], !noalias [[META44]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I]]), !noalias [[META41]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I12_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I12_I]], align 2, !tbaa [[TBAA49:![0-9]+]], !alias.scope [[META41]] +// CHECK-NEXT: [[ARRAYIDX_I12_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I12_I]], align 2, !tbaa [[TBAA49:![0-9]+]], !alias.scope [[META41]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENS0_3vecIS5_Li3EEERKS7_S9_.exit: @@ -147,7 +147,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.30") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.30") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.30") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.24") align 64 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.24") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA11]], !noalias [[META53]] @@ -162,7 +162,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.35") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.35") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] !srcloc [[META56:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.28") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.28") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] !srcloc [[META56:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 // CHECK-NEXT: [[LOADVEC4_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 @@ -176,7 +176,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.45") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.45") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META57:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.32") align 2 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.36") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META57:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA11]], !noalias [[META58]] @@ -191,7 +191,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.50") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META61:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.40") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META61:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META62]] @@ -206,7 +206,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.65") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.65") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META65:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.48") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.52") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.52") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META65:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) @@ -218,14 +218,14 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENS0_3VECISLI4EEERKNS6_IS5_LI4EEESA__EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[ARRAYIDX_I_I_I13_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META66]] -// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I13_I]]) #[[ATTR8]], !noalias [[META66]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I13_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I]]) #[[ATTR8]], !noalias [[META66]] +// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I13_I]]) #[[ATTR8]], !noalias [[META66]] // CHECK-NEXT: [[CMP_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] // CHECK-NEXT: [[CONV5_I:%.*]] = sext i1 [[CMP_I_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I15_I:%.*]] = getelementptr inbounds nuw [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CONV5_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I15_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META66]] +// CHECK-NEXT: [[ARRAYIDX_I15_I:%.*]] = getelementptr inbounds [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CONV5_I]], ptr addrspace(4) [[ARRAYIDX_I15_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META66]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENS0_3vecIsLi4EEERKNS6_IS5_Li4EEESA_.exit: @@ -239,7 +239,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.69") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.69") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.56") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META71]] @@ -247,13 +247,13 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <3 x i1> [[CMP_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I2_I:%.*]] = shufflevector <3 x i32> [[SEXT_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA11]], !alias.scope [[META71]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META71]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.74") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.74") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META74:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.60") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.60") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META74:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META75]] @@ -265,42 +265,42 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.78") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META78:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.64") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.64") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META78:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META79]] // CHECK-NEXT: [[NOT_I:%.*]] = xor <16 x i8> [[TMP0]], -// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA11]], !alias.scope [[META79]] +// CHECK-NEXT: store <16 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META79]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.83") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.15") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META82:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.68") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.12") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META82:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META83]] // CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA11]], !alias.scope [[META83]] +// CHECK-NEXT: store <4 x i8> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META83]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.88") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.93") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META86:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.72") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.76") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META86:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA11]], !noalias [[META87]] // CHECK-NEXT: [[CMP_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA11]], !alias.scope [[META87]] +// CHECK-NEXT: store <2 x i16> [[SEXT_I]], ptr addrspace(4) [[AGG_RESULT]], align 1, !alias.scope [[META87]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.55") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.55") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_used_aspects [[META35]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.44") align 16 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.44") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_used_aspects [[META35]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA11]], !noalias [[META91]] @@ -311,7 +311,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.98") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.25") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META94:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.80") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.20") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META94:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) @@ -322,12 +322,12 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 3 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META95]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I]]) #[[ATTR8]], !noalias [[META95]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = fcmp oeq float [[CALL_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV3_I:%.*]] = sext i1 [[TOBOOL_I]] to i16 -// CHECK-NEXT: [[ARRAYIDX_I_I_I10_I:%.*]] = getelementptr inbounds nuw [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CONV3_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I10_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META95]] +// CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds [4 x i16], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CONV3_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 2, !tbaa [[TBAA49]], !alias.scope [[META95]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP98:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: @@ -336,7 +336,7 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.102") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.102") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META99:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.84") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.84") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META99:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -349,15 +349,15 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I]]) #[[ATTR8]], !noalias [[META103:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I]]) #[[ATTR8]], !noalias [[META103:![0-9]+]] // CHECK-NEXT: [[FNEG_I:%.*]] = fneg float [[CALL_I_I_I]] // CHECK-NEXT: store float [[FNEG_I]], ptr [[REF_TMP_I]], align 4, !tbaa [[TBAA47]], !noalias [[META103]] // CHECK-NEXT: [[CALL_I_I10_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I]]) #[[ATTR8]], !noalias [[META103]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I]]) -// CHECK-NEXT: [[ARRAYIDX_I_I_I9_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] -// CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I_I_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[AGG_RESULT]], i64 0, i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CALL_I_I10_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 2, !tbaa [[TBAA103:![0-9]+]], !alias.scope [[META100]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP105:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: diff --git a/sycl/test/regression/vec_array_windows.cpp b/sycl/test/regression/vec_array_windows.cpp new file mode 100644 index 0000000000000..d9511de372f54 --- /dev/null +++ b/sycl/test/regression/vec_array_windows.cpp @@ -0,0 +1,23 @@ +// Test to isolate sycl::vec regression after +// https://github.com/intel/llvm/pull/14130. This PR caused sycl::vec to use +// std::array as its underlying storage. However, operations on std::array +// may emit debug-mode-only functions, on which the device compiler may fail. + +// REQUIRES: windows + +// RUN: %clangxx -fsycl -D_DEBUG %s -fsycl-device-only -Xclang -verify %s -Xclang -verify-ignore-unexpected=note,warning +// RUN: %if preview-breaking-changes-supported %{ %clangxx -fsycl -fpreview-breaking-changes -D_DEBUG -fsycl-device-only %s %} + +#include + +// expected-no-diagnostics +// +// Our current implementation automatically opts-in for a new implementation if +// that is possible without breaking ABI. +// However, depending on the environment (used STL implementation, in +// particular) it may not be the case. Therefore, the lines below are kept for +// reference of how an error would look like in a problematic environment. +// not-expected-error@* {{SYCL kernel cannot call a variadic function}} +// not-expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} +// not-expected-error@* {{SYCL kernel cannot call an undefined function without SYCL_EXTERNAL attribute}} +SYCL_EXTERNAL auto GetFirstElement(sycl::vec v) { return v[0]; }