Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,6 @@

#pragma once

#ifndef __SYCL_USE_NEW_VEC_IMPL
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
#define __SYCL_USE_NEW_VEC_IMPL 1
#else
#define __SYCL_USE_NEW_VEC_IMPL 0
#endif
#endif

#include <cstddef>
#include <type_traits>

Expand Down
2 changes: 0 additions & 2 deletions sycl/include/sycl/detail/vector_convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@
#include <sycl/detail/generic_type_traits.hpp> // for is_sigeninteger, is_s...
#include <sycl/exception.hpp> // for errc

#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
#include <sycl/detail/memcpy.hpp>
#endif
#include <sycl/ext/oneapi/bfloat16.hpp> // bfloat16
#include <sycl/vector.hpp>

Expand Down
45 changes: 37 additions & 8 deletions sycl/include/sycl/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@
#endif
#endif // __clang__

// See vec::DataType definitions for more details
#ifndef __SYCL_USE_NEW_VEC_IMPL
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES)
#define __SYCL_USE_NEW_VEC_IMPL 1
#else
#define __SYCL_USE_NEW_VEC_IMPL 0
#endif
#endif

#if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__)
#error "SYCL device compiler is built without ext_vector_type support"
#endif
Expand Down Expand Up @@ -86,6 +95,9 @@ struct elem {
};

namespace detail {
// To be defined in tests, trick to access vec's private methods
template <typename T1, int T2> class vec_base_test;

template <typename VecT, typename OperationLeftT, typename OperationRightT,
template <typename> class OperationCurrentT, int... Indexes>
class SwizzleOp;
Expand Down Expand Up @@ -144,17 +156,34 @@ template <typename DataT, int NumElements> class vec_base {
static constexpr size_t AdjustedNum = (NumElements == 3) ? 4 : NumElements;
// This represent type of underlying value. There should be only one field
// in the class, so vec<float, 16> should be equal to float16 in memory.
#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) && \
defined(__SYCL_USE_NEW_VEC_IMPL)
using DataType = DataT[AdjustedNum];
//
// In intel/llvm#14130 we incorrectly used std::array as an underlying storage
// for vec data. The problem with std::array is that it comes from the C++
// STL headers which we do not control and they may use something that is
// illegal in SYCL device code. One of specific examples is use of debug
// assertions in MSVC's STL implementation.
//
// The better approach is to use plain C++ array, but the problem here is that
// C++ specification does not provide any guarantees about the memory layout
// of std::array and therefore directly switching to it would technically be
// an ABI-break, even though the practical chances of encountering the issue
// are low.
//
// To play it safe, we only switch to use plain array if both its size and
// alignment match those of std::array, or unless the new behavior is forced
// via __SYCL_USE_NEW_VEC_IMPL or preview breaking changes mode.
using DataType = std::conditional_t<
#if __SYCL_USE_NEW_VEC_IMPL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, I think @aelovikov-intel is also using this flag for some other (draft?) sycl::vec refactoring work. So, if the user explicitly specifies __SYCL_USE_NEW_VEC_IMPL macro, it might bring in Andrei's changes as well, along with the switch to using C-arrays.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main point here is to have some sort of opt-in mechanism for customers who value building in debug mode on windows using clang.exe (instead of clang-cl.exe) over backwards compatibility or potential ABI issues. So, I'm fine with choosing any other name here, I've just re-used what we had already

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. Let's use some other macro name

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in 0c1c53d

true,
#else
using DataType = std::array<DataT, AdjustedNum>;
// Assuming that std::array has the same size as the underlying array.
// C++ standard does not guarantee that, but it is true for most popular
// implementations.
static_assert(sizeof(DataType) == sizeof(DataT[AdjustedNum]));
sizeof(std::array<DataT, AdjustedNum>) == sizeof(DataT[AdjustedNum]) &&
alignof(std::array<DataT, AdjustedNum>) ==
alignof(DataT[AdjustedNum]),
#endif
DataT[AdjustedNum], std::array<DataT, AdjustedNum>>;

// To allow testing of private methods
template <typename T1, int T2> friend class detail::vec_base_test;
protected:
// fields
// Alignment is the same as size, to a maximum size of 64. SPEC requires
Expand Down
6 changes: 2 additions & 4 deletions sycl/test/abi/layout_vec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ SYCL_EXTERNAL void foo(sycl::vec<int, 4>) {}

// CHECK: 0 | class sycl::vec<int, 4>
// ignore empty base classes
// CHECK: 0 | struct std::array<int, 4> m_Data
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
// CHECK: 0 | DataType m_Data
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
// CHECK-NEXT: | nvsize=16, nvalign=16]

Expand All @@ -23,7 +22,6 @@ SYCL_EXTERNAL void foo(sycl::vec<bool, 16>) {}

// CHECK: 0 | class sycl::vec<_Bool, 16>
// ignore empty base classes
// CHECK: 0 | struct std::array<_Bool, 16> m_Data
// CHECK-NEXT: 0 | typename {{.+}}::_Type _M_elems
// CHECK: 0 | DataType m_Data
// CHECK-NEXT: | [sizeof=16, dsize=16, align=16,
// CHECK-NEXT: | nvsize=16, nvalign=16]
42 changes: 42 additions & 0 deletions sycl/test/basic_tests/vectors/storage.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -fpreview-breaking-changes
// RUN: %clangxx -fsycl -Xclang -verify %s -fsyntax-only -D__SYCL_USE_NEW_VEC_IMPL=1
// expected-no-diagnostics

#include <sycl/vector.hpp>

#include <type_traits>

namespace sycl {
namespace detail {
template <typename T, int N> class vec_base_test {
public:
static void do_check() {
constexpr bool uses_std_array =
std::is_same_v<typename sycl::vec<T, N>::DataType, std::array<T, N>>;
constexpr bool uses_plain_array =
std::is_same_v<typename sycl::vec<T, N>::DataType, T[N]>;

constexpr bool std_array_and_plain_array_have_the_same_layout =
sizeof(std::array<T, N>) == sizeof(T[N]) &&
alignof(std::array<T, N>) == alignof(T[N]);

#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) || __SYCL_USE_NEW_VEC_IMPL
static_assert(uses_plain_array,
"We must use plain array regardless of "
"layout, because user is opted-in for a potential ABI-break");
#else
static_assert(std_array_and_plain_array_have_the_same_layout ==
uses_plain_array,
"If layouts are the same, we must use safer plain array "
"instead of std::array, or vice versa");
static_assert(
!std_array_and_plain_array_have_the_same_layout == uses_std_array,
"If layouts are not the same, we must use std::array to preserve ABI");
#endif
}
};
} // namespace detail
} // namespace sycl

int main() { sycl::detail::vec_base_test<int, 4>::do_check(); }
10 changes: 5 additions & 5 deletions sycl/test/check_device_code/vector/bf16_builtins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec<bfloat16, 2> a, vec<bfloat16, 2> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.71") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.71") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16
// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec<bfloat16, 3> a, vec<bfloat16, 3> b) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.149") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.188") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META38:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8
// CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4
Expand All @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec<bfloat16, 4> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.342") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32
// CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down Expand Up @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec<bfloat16, 8> a) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.420") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META70:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64
// CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2
Expand Down
8 changes: 4 additions & 4 deletions sycl/test/check_device_code/vector/convert_bfloat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.71") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]])
// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]]
Expand All @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec<bfloat16, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.110") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META24:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]]
Expand Down Expand Up @@ -182,7 +182,7 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec<int, 3> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.149") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]]
Expand All @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec<long long, 1> &inp) {
}

// CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE(
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.229") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]])
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]]
Expand Down
Loading
Loading