diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp index afc2a676023b8..ff456ace8816e 100644 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -15,11 +15,7 @@ #ifndef __SYCL_USE_LIBSYCL8_VEC_IMPL #if defined(__INTEL_PREVIEW_BREAKING_CHANGES) -// Several specification changes need to be implemented together to keep CTS -// passing. We'll switch to `0` once they all land. -// `__SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE` needs to be changed to use this -// `__SYCL_USE_LIBSYCL8_VEC_IMPL` at that time as well. -#define __SYCL_USE_LIBSYCL8_VEC_IMPL 1 +#define __SYCL_USE_LIBSYCL8_VEC_IMPL 0 #else #define __SYCL_USE_LIBSYCL8_VEC_IMPL 1 #endif diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index 5671dfb514b1d..e9fca44387487 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -24,11 +24,7 @@ // See vec::DataType definitions for more details #ifndef __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE -#if defined(__INTEL_PREVIEW_BREAKING_CHANGES) -#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 1 -#else -#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE 0 -#endif +#define __SYCL_USE_PLAIN_ARRAY_AS_VEC_STORAGE !__SYCL_USE_LIBSYCL8_VEC_IMPL #endif #if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__) @@ -309,8 +305,13 @@ template class ConversionToVecMixin { public: operator vec_ty() const { - vec_ty res{*static_cast(this)}; - return res; + auto &self = *static_cast(this); + if constexpr (vec_ty::size() == 1) + // Avoid recursion by explicitly going through `vec(const DataT &)` ctor. + return vec_ty{static_cast(self)}; + else + // Uses `vec`'s variadic ctor. + return vec_ty{self}; } }; @@ -398,9 +399,8 @@ class __SYCL_EBO Swizzle public ApplyIf>>, - public ApplyIf>>, + public ConversionToVecMixin< + Swizzle>, public NamedSwizzlesMixinBoth< Swizzle> { using Base = SwizzleBase>; diff --git a/sycl/test/basic_tests/vectors/assign.cpp b/sycl/test/basic_tests/vectors/assign.cpp index df02b9420d7a4..f284089646419 100644 --- a/sycl/test/basic_tests/vectors/assign.cpp +++ b/sycl/test/basic_tests/vectors/assign.cpp @@ -1,5 +1,4 @@ -// TODO: Remove `__SYCL_USE_LIBSYCL8_VEC_IMPL` once it's auto-set. -// RUN: %clangxx -fsycl -fsyntax-only %s -fpreview-breaking-changes -D__SYCL_USE_LIBSYCL8_VEC_IMPL=0 +// RUN: %clangxx -fsycl -fsyntax-only %s -fpreview-breaking-changes // RUN: %clangxx -fsycl -fsyntax-only %s #include @@ -55,11 +54,7 @@ static_assert( !std::is_assignable_v, sw_double_2>) static_assert( std::is_assignable_v, half>); static_assert( std::is_assignable_v, float>); static_assert( std::is_assignable_v, double>); -#if __SYCL_DEVICE_ONLY__ -static_assert(EXCEPT_IN_PREVIEW std::is_assignable_v, vec>); -#else static_assert(EXCEPT_IN_PREVIEW std::is_assignable_v, vec>); -#endif static_assert( std::is_assignable_v, vec>); static_assert( std::is_assignable_v, vec>); static_assert(EXCEPT_IN_PREVIEW std::is_assignable_v, sw_half_1>); diff --git a/sycl/test/basic_tests/vectors/cxx_conversions.cpp b/sycl/test/basic_tests/vectors/cxx_conversions.cpp index 4973b07cd00db..4bed8d6da86a0 100644 --- a/sycl/test/basic_tests/vectors/cxx_conversions.cpp +++ b/sycl/test/basic_tests/vectors/cxx_conversions.cpp @@ -1,5 +1,4 @@ -// TODO: Remove `__SYCL_USE_LIBSYCL8_VEC_IMPL` once it's auto-set. -// RUN: %clangxx -fsycl -fsyntax-only %s -fpreview-breaking-changes -D__SYCL_USE_LIBSYCL8_VEC_IMPL=0 +// RUN: %clangxx -fsycl -fsyntax-only %s -fpreview-breaking-changes // RUN: %clangxx -fsycl -fsyntax-only %s #include @@ -53,7 +52,7 @@ using sw_double_2 = decltype(std::declval>().swizzle<1, 2>()); static_assert( std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); -static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); +static_assert( std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); static_assert( std::is_invocable_v>); @@ -64,7 +63,7 @@ static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v) static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); -static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); +static_assert( std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v); static_assert(EXCEPT_IN_PREVIEW std::is_invocable_v>); static_assert( std::is_invocable_v>); diff --git a/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp b/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp index 944c4c566bfa1..c53098f593947 100644 --- a/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp +++ b/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp @@ -31,37 +31,37 @@ using namespace sycl::ext::oneapi::experimental; // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [2 x float], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[A]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[B]], align 4 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META11:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META11]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META10:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META10]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: store <2 x i16> [[TMP0]], ptr [[VEC_ADDR_I_I_I_I_I]], align 4, !tbaa [[TBAA14:![0-9]+]], !noalias [[META11]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5:[0-9]+]], !noalias [[META11]] -// CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META11]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META11]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META11]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META17:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META17]] +// CHECK-NEXT: store <2 x i16> [[TMP0]], ptr [[VEC_ADDR_I_I_I_I_I]], align 4, !tbaa [[TBAA13:![0-9]+]], !noalias [[META10]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5:[0-9]+]], !noalias [[META10]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA13]], !noalias [[META10]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META10]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META10]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META16:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META16]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I4_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) -// CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 4, !tbaa [[TBAA14]], !noalias [[META17]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META17]] -// CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !tbaa [[TBAA14]], !noalias [[META17]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META17]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META17]] +// CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 4, !tbaa [[TBAA13]], !noalias [[META16]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META16]] +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !tbaa [[TBAA13]], !noalias [[META16]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META16]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META16]] // CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <2 x float> @_Z16__spirv_ocl_fminDv2_fS_(<2 x float> noundef [[TMP2]], <2 x float> noundef [[TMP3]]) #[[ATTR6:[0-9]+]] -// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META23:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META23]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META22:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META22]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I10_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I8_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I11_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I9_I]] to ptr addrspace(4) -// CHECK-NEXT: store <2 x float> [[CALL2_I_I]], ptr [[VEC_ADDR_I_I_I_I8_I]], align 8, !tbaa [[TBAA14]], !noalias [[META23]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I10_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I11_I]]) #[[ATTR5]], !noalias [[META23]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DST_I_I_I_I9_I]], align 2, !tbaa [[TBAA14]], !noalias [[META23]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META23]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META23]] -// CHECK-NEXT: store i32 [[TMP4]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META23]] +// CHECK-NEXT: store <2 x float> [[CALL2_I_I]], ptr [[VEC_ADDR_I_I_I_I8_I]], align 8, !tbaa [[TBAA13]], !noalias [[META22]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec2(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I10_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I11_I]]) #[[ATTR5]], !noalias [[META22]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DST_I_I_I_I9_I]], align 2, !tbaa [[TBAA13]], !noalias [[META22]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META22]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META22]] +// CHECK-NEXT: store i32 [[TMP4]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META22]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMin(vec a, vec b) { @@ -71,51 +71,51 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 -// CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 +// CHECK-NEXT: [[VEC_ADDR_I_I_I_I13_I:%.*]] = alloca <3 x float>, align 16 +// CHECK-NEXT: [[DST_I_I_I_I14_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[B]], align 8 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META26:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META26]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I_I:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META28]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META28]] -// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META28]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META28]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META28]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA13]], !noalias [[META26]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META26]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META29:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META29]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I6_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I7_I:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA14]], !noalias [[META31]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META31]] -// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META31]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META31]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META31]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I7_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 8, !tbaa [[TBAA13]], !noalias [[META29]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META29]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I8_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I3_I]], align 4, !noalias [[META29]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META29]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META29]] // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I4_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I8_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: [[CALL2_I_I:%.*]] = call spir_func noundef <3 x float> @_Z16__spirv_ocl_fmaxDv3_fS_(<3 x float> noundef [[EXTRACTVEC_I_I_I_I]], <3 x float> noundef [[EXTRACTVEC_I_I4_I_I]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] -// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I15_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I12_I]] to ptr addrspace(4) -// CHECK-NEXT: [[DST_ASCAST_I_I_I_I16_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I13_I]] to ptr addrspace(4) -// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I17_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I17_I]], ptr [[VEC_ADDR_I_I_I_I12_I]], align 16, !tbaa [[TBAA14]], !noalias [[META37]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I15_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I16_I]]) #[[ATTR5]], !noalias [[META37]] -// CHECK-NEXT: [[LOADVECN_I_I_I_I_I18_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I13_I]], align 2, !noalias [[META37]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] -// CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I13_I]]), !noalias [[META35:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I14_I]]), !noalias [[META35]] +// CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I16_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I13_I]] to ptr addrspace(4) +// CHECK-NEXT: [[DST_ASCAST_I_I_I_I17_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I14_I]] to ptr addrspace(4) +// CHECK-NEXT: [[EXTRACTVEC_I_I_I_I18_I:%.*]] = shufflevector <3 x float> [[CALL2_I_I]], <3 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I18_I]], ptr [[VEC_ADDR_I_I_I_I13_I]], align 16, !tbaa [[TBAA13]], !noalias [[META35]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I16_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I17_I]]) #[[ATTR5]], !noalias [[META35]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I19_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I14_I]], align 2, !noalias [[META35]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I13_I]]), !noalias [[META35]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I14_I]]), !noalias [[META35]] +// CHECK-NEXT: [[EXTRACTVEC_I20_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I19_I]], <4 x i16> poison, <4 x i32> +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I20_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META35]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMax(vec a, vec b) { @@ -123,25 +123,25 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.183") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 8 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META42:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META42]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META39:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META39]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA14]], !noalias [[META42]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META42]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META42]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META42]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META42]] +// CHECK-NEXT: store <4 x i16> [[TMP0]], ptr [[VEC_ADDR_I_I_I_I_I]], align 8, !tbaa [[TBAA13]], !noalias [[META39]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META39]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA13]], !noalias [[META39]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META39]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META39]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef <4 x i8> @_Z13__spirv_IsNanDv4_f(<4 x float> noundef [[TMP1]]) #[[ATTR6]] // CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef <4 x i32> @_Z22__spirv_SConvert_Rint4Dv4_a(<4 x i8> noundef [[CALL_I_I_I_I]]) #[[ATTR6]] // CHECK-NEXT: [[CALL_I_I_I2_I:%.*]] = call spir_func noundef <4 x i16> @_Z24__spirv_SConvert_Rshort4Dv4_i(<4 x i32> noundef [[CALL_I_I_I_I_I_I]]) #[[ATTR6]] -// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META42:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestIsNan(vec a) { @@ -149,35 +149,35 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.335") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.335") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <8 x i16>, align 16 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [8 x float], align 4 // CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <8 x i16>, ptr [[A]], align 16 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META52:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META52]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META48:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META48]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: store <8 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 16, !tbaa [[TBAA14]], !noalias [[META52]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META52]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META52]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META52]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META52]] +// CHECK-NEXT: store <8 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 16, !tbaa [[TBAA13]], !noalias [[META48]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META48]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA13]], !noalias [[META48]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META48]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META48]] // CHECK-NEXT: [[CALL1_I_I:%.*]] = call spir_func noundef <8 x float> @_Z16__spirv_ocl_fabsDv8_f(<8 x float> noundef [[TMP0]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META58:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META58]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META54:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META54]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I4_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) -// CHECK-NEXT: store <8 x float> [[CALL1_I_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 32, !tbaa [[TBAA14]], !noalias [[META58]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META58]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META58]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META58]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META58]] -// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META58]] +// CHECK-NEXT: store <8 x float> [[CALL1_I_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 32, !tbaa [[TBAA13]], !noalias [[META54]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META54]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA13]], !noalias [[META54]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META54]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META54]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META54]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFabs(vec a) { @@ -185,35 +185,35 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.335") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.335") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <8 x i16>, align 16 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [8 x float], align 4 // CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <8 x i16>, ptr [[A]], align 16 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META63:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META63]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META58:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META58]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: store <8 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 16, !tbaa [[TBAA14]], !noalias [[META63]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META63]] -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META63]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META63]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META63]] +// CHECK-NEXT: store <8 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 16, !tbaa [[TBAA13]], !noalias [[META58]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META58]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA13]], !noalias [[META58]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META58]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META58]] // CHECK-NEXT: [[CALL1_I_I:%.*]] = call spir_func noundef <8 x float> @_Z16__spirv_ocl_ceilDv8_f(<8 x float> noundef [[TMP0]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META69:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META69]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META64:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META64]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I4_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I2_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I3_I]] to ptr addrspace(4) -// CHECK-NEXT: store <8 x float> [[CALL1_I_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 32, !tbaa [[TBAA14]], !noalias [[META69]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META69]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META69]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META69]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META69]] -// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META69]] +// CHECK-NEXT: store <8 x float> [[CALL1_I_I]], ptr [[VEC_ADDR_I_I_I_I2_I]], align 32, !tbaa [[TBAA13]], !noalias [[META64]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I4_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I5_I]]) #[[ATTR5]], !noalias [[META64]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA13]], !noalias [[META64]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META64]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META64]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META64]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestCeil(vec a) { @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.411") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.411") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.411") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.411") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -234,46 +234,46 @@ SYCL_EXTERNAL auto TestCeil(vec a) { // CHECK-NEXT: [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i16>, ptr [[A]], align 32 // CHECK-NEXT: [[AGG_TMP1_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i16>, ptr [[B]], align 32 // CHECK-NEXT: [[AGG_TMP2_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i16>, ptr [[C]], align 32 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META74:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META74]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META68:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META68]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I_I]] to ptr addrspace(4) -// CHECK-NEXT: store <16 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 32, !tbaa [[TBAA14]], !noalias [[META74]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META74]] -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA14]], !noalias [[META74]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META74]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META74]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I3_I]]), !noalias [[META77:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I4_I]]), !noalias [[META77]] +// CHECK-NEXT: store <16 x i16> [[AGG_TMP_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I_I]], align 32, !tbaa [[TBAA13]], !noalias [[META68]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I_I]]) #[[ATTR5]], !noalias [[META68]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I_I]], align 4, !tbaa [[TBAA13]], !noalias [[META68]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I_I]]), !noalias [[META68]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I_I]]), !noalias [[META68]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I3_I]]), !noalias [[META71:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I4_I]]), !noalias [[META71]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I5_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I3_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I6_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I4_I]] to ptr addrspace(4) -// CHECK-NEXT: store <16 x i16> [[AGG_TMP1_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I3_I]], align 32, !tbaa [[TBAA14]], !noalias [[META77]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META77]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I4_I]], align 4, !tbaa [[TBAA14]], !noalias [[META77]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I3_I]]), !noalias [[META77]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I4_I]]), !noalias [[META77]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I7_I]]), !noalias [[META80:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I8_I]]), !noalias [[META80]] +// CHECK-NEXT: store <16 x i16> [[AGG_TMP1_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I3_I]], align 32, !tbaa [[TBAA13]], !noalias [[META71]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I5_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I6_I]]) #[[ATTR5]], !noalias [[META71]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I4_I]], align 4, !tbaa [[TBAA13]], !noalias [[META71]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I3_I]]), !noalias [[META71]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I4_I]]), !noalias [[META71]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I7_I]]), !noalias [[META74:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[DST_I_I_I_I8_I]]), !noalias [[META74]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I9_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I7_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I10_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I8_I]] to ptr addrspace(4) -// CHECK-NEXT: store <16 x i16> [[AGG_TMP2_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I7_I]], align 32, !tbaa [[TBAA14]], !noalias [[META80]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I9_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I10_I]]) #[[ATTR5]], !noalias [[META80]] -// CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I8_I]], align 4, !tbaa [[TBAA14]], !noalias [[META80]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I7_I]]), !noalias [[META80]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I8_I]]), !noalias [[META80]] +// CHECK-NEXT: store <16 x i16> [[AGG_TMP2_SROA_0_SROA_0_0_COPYLOAD]], ptr [[VEC_ADDR_I_I_I_I7_I]], align 32, !tbaa [[TBAA13]], !noalias [[META74]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I9_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I10_I]]) #[[ATTR5]], !noalias [[META74]] +// CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, ptr [[DST_I_I_I_I8_I]], align 4, !tbaa [[TBAA13]], !noalias [[META74]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I7_I]]), !noalias [[META74]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[DST_I_I_I_I8_I]]), !noalias [[META74]] // CHECK-NEXT: [[CALL3_I_I:%.*]] = call spir_func noundef <16 x float> @_Z15__spirv_ocl_fmaDv16_fS_S_(<16 x float> noundef [[TMP0]], <16 x float> noundef [[TMP1]], <16 x float> noundef [[TMP2]]) #[[ATTR6]] -// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META86:![0-9]+]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META86]] +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META80:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META80]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I16_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I14_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I17_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I15_I]] to ptr addrspace(4) -// CHECK-NEXT: store <16 x float> [[CALL3_I_I]], ptr [[VEC_ADDR_I_I_I_I14_I]], align 64, !tbaa [[TBAA14]], !noalias [[META86]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I16_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I17_I]]) #[[ATTR5]], !noalias [[META86]] -// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr [[DST_I_I_I_I15_I]], align 2, !tbaa [[TBAA14]], !noalias [[META86]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META86]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META86]] -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !alias.scope [[META86]] +// CHECK-NEXT: store <16 x float> [[CALL3_I_I]], ptr [[VEC_ADDR_I_I_I_I14_I]], align 64, !tbaa [[TBAA13]], !noalias [[META80]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec16(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I16_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I17_I]]) #[[ATTR5]], !noalias [[META80]] +// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr [[DST_I_I_I_I15_I]], align 2, !tbaa [[TBAA13]], !noalias [[META80]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META80]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META80]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !alias.scope [[META80]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMA(vec a, vec b, diff --git a/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp b/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp index ff4f016449a61..6e96ea2f3d948 100644 --- a/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp +++ b/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp @@ -17,20 +17,20 @@ using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META8]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META7]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META7]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META7]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11:![0-9]+]], !noalias [[META8]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4:[0-9]+]], !noalias [[META8]] -// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META8]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA10:![0-9]+]], !noalias [[META7]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4:[0-9]+]], !noalias [[META7]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META7]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META7]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META7]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META8]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META7]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { @@ -42,20 +42,20 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META15]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META13]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META13]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META13]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA11]], !noalias [[META15]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META15]] -// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META15]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 8, !tbaa [[TBAA10]], !noalias [[META13]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META13]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x float>, ptr [[DST_I_I_I_I]], align 4, !noalias [[META13]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META13]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META13]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META13]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { @@ -65,8 +65,8 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META16]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I]], <4 x i16> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: @@ -76,13 +76,13 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EE7CONVERTIILNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i16> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rz(i16 noundef zeroext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META19]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rz(i16 noundef zeroext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META16]] // CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], i32 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EE7convertIiLNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META16]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { @@ -90,12 +90,23 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META25]] -// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META25]] +// CHECK-NEXT: [[RESULT_I:%.*]] = alloca %"class.sycl::_V1::vec.108", align 4 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[RESULT_I]]) +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]] +// CHECK: arrayinit.body.i.i.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_I_I_I:%.*]] = phi ptr [ [[RESULT_I]], [[ENTRY:%.*]] ], [ [[ARRAYINIT_NEXT_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ] +// CHECK-NEXT: store i32 0, ptr [[ARRAYINIT_CUR_I_I_I]], align 4, !tbaa [[TBAA24:![0-9]+]], !noalias [[META21]] +// CHECK-NEXT: [[ARRAYINIT_NEXT_I_I_I]] = getelementptr inbounds i8, ptr [[ARRAYINIT_CUR_I_I_I]], i64 4 +// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq ptr [[ARRAYINIT_CUR_I_I_I]], [[RESULT_I]] +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[_ZNK4SYCL3_V13VECINS0_3EXT6ONEAPI8BFLOAT16ELI1EE7CONVERTIILNS_13ROUNDING_MODEE0EEENS1_IT_LI1EEEV_EXIT:%.*]], label [[ARRAYINIT_BODY_I_I_I]] +// CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EE7convertIiLNS_13rounding_modeE0EEENS1_IT_Li1EEEv.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA10]], !noalias [[META21]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META21]] +// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META21]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[RESULT_I]]) // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { @@ -107,20 +118,20 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META26]] // CHECK-NEXT: [[VEC_ADDR_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[VEC_ADDR_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[DST_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[DST_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: [[EXTRACTVEC_I_I_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA11]], !noalias [[META29]] -// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META29]] -// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META29]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I_I_I]], ptr [[VEC_ADDR_I_I_I_I]], align 16, !tbaa [[TBAA10]], !noalias [[META26]] +// CHECK-NEXT: call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[VEC_ADDR_ASCAST_I_I_I_I]], ptr addrspace(4) noundef [[DST_ASCAST_I_I_I_I]]) #[[ATTR4]], !noalias [[META26]] +// CHECK-NEXT: [[LOADVECN_I_I_I_I_I:%.*]] = load <4 x i16>, ptr [[DST_I_I_I_I]], align 2, !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META26]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META26]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META29]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META26]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { @@ -130,8 +141,8 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { // CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META29]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I]], <4 x float> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: @@ -141,13 +152,13 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIFLI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x float> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META33]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_float2bfloat16_rz(float noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META29]] // CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META33]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META29]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { @@ -157,8 +168,8 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { // CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: @@ -168,13 +179,13 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECIILI3EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE2EEENS1_IT_LI3EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <3 x i32> [[EXTRACTVEC_I_I]], i32 [[I_0_I_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META38]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_int2bfloat16_rz(i32 noundef [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META33]] // CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META38]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { @@ -182,12 +193,23 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.145") align 2 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META43]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META43]] +// CHECK-NEXT: [[RESULT_I:%.*]] = alloca %"class.sycl::_V1::vec.145", align 2 +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 2, ptr nonnull [[RESULT_I]]) +// CHECK-NEXT: br label [[ARRAYINIT_BODY_I_I_I:%.*]] +// CHECK: arrayinit.body.i.i.i: +// CHECK-NEXT: [[ARRAYINIT_CUR_I_I_I:%.*]] = phi ptr [ [[RESULT_I]], [[ENTRY:%.*]] ], [ [[ARRAYINIT_NEXT_I_I_I:%.*]], [[ARRAYINIT_BODY_I_I_I]] ] +// CHECK-NEXT: store i16 0, ptr [[ARRAYINIT_CUR_I_I_I]], align 2, !noalias [[META37]] +// CHECK-NEXT: [[ARRAYINIT_NEXT_I_I_I]] = getelementptr inbounds i8, ptr [[ARRAYINIT_CUR_I_I_I]], i64 2 +// CHECK-NEXT: [[ARRAYINIT_DONE_I_I_I:%.*]] = icmp eq ptr [[ARRAYINIT_CUR_I_I_I]], [[RESULT_I]] +// CHECK-NEXT: br i1 [[ARRAYINIT_DONE_I_I_I]], label [[_ZNK4SYCL3_V13VECIXLI1EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE3EEENS1_IT_LI1EEEV_EXIT:%.*]], label [[ARRAYINIT_BODY_I_I_I]] +// CHECK: _ZNK4sycl3_V13vecIxLi1EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE3EEENS1_IT_Li1EEEv.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA40:![0-9]+]], !noalias [[META37]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META37]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META37]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[RESULT_I]]) // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { @@ -195,10 +217,10 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.226") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA10]], !noalias [[META42]] // CHECK-NEXT: br label [[FOR_COND_I_I_I:%.*]] // CHECK: for.cond.i.i.i: // CHECK-NEXT: [[RETVAL1_0_I_I_I:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[VECINS_I_I_I:%.*]], [[FOR_BODY_I_I_I:%.*]] ] @@ -207,12 +229,12 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { // CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_BODY_I_I_I]], label [[_ZNK4SYCL3_V13VECISLI2EE7CONVERTINS0_3EXT6ONEAPI8BFLOAT16ELNS_13ROUNDING_MODEE4EEENS1_IT_LI2EEEV_EXIT:%.*]] // CHECK: for.body.i.i.i: // CHECK-NEXT: [[VECEXT_I_I_I:%.*]] = extractelement <2 x i16> [[TMP0]], i32 [[I_0_I_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_short2bfloat16_rd(i16 noundef signext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META49]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_short2bfloat16_rd(i16 noundef signext [[VECEXT_I_I_I]]) #[[ATTR4]], !noalias [[META42]] // CHECK-NEXT: [[VECINS_I_I_I]] = insertelement <2 x i16> [[RETVAL1_0_I_I_I]], i16 [[CALL_I_I_I_I]], i32 [[I_0_I_I_I]] // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIsLi2EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE4EEENS1_IT_Li2EEEv.exit: -// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META49]] +// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META42]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestShorttoBFDeviceRTN(vec &inp) { diff --git a/sycl/test/check_device_code/vector/math_ops_preview.cpp b/sycl/test/check_device_code/vector/math_ops_preview.cpp index b78018f3bfaab..3919ca83fe35d 100644 --- a/sycl/test/check_device_code/vector/math_ops_preview.cpp +++ b/sycl/test/check_device_code/vector/math_ops_preview.cpp @@ -21,12 +21,12 @@ using namespace sycl; // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIiLi2EEES2_( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA14:![0-9]+]], !noalias [[META17:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META17]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA13:![0-9]+]], !noalias [[META16:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA13]], !noalias [[META16]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] +// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META17:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } @@ -34,40 +34,40 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.33") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] -// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META26:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META26]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META28]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META26]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.72") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.72") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.72") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META36:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META36]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA13]], !noalias [[META33:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA13]], !noalias [[META33]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META36]] +// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.111") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.111") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.111") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META44:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META44]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA13]], !noalias [[META40:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA13]], !noalias [[META40]] // CHECK-NEXT: [[XOR_I_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META41:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestXor(vec a, vec b) { @@ -75,69 +75,69 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.150") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.150") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.150") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META55:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA14]], !noalias [[META55]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA13]], !noalias [[META50:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA13]], !noalias [[META50]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP1]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne <4 x i8> [[TMP0]], [[ADD_I_I_I_I_I]] // CHECK-NEXT: [[SEXT_NEG_I_I:%.*]] = zext <4 x i1> [[CMP_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META51:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.189") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.189") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.189") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] -// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META62:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META62]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> -// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META63:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.228") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.228") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.228") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.203", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.228", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] -// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META79:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META66]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META72:![0-9]+]] // CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 3 -// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST4PLUSIVET_EENS0_3VECIS5_LI3EEEE4TYPEERKSB_SF__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEES8__EXIT:%.*]] // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: [[ARRAYIDX_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META73:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR7:[0-9]+]], !noalias [[META76:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I12_I_I]]) #[[ATTR7]], !noalias [[META76]] // CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] -// CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86:![0-9]+]], !noalias [[META83]] -// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META83]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80]] +// CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA79:![0-9]+]], !noalias [[META76]] +// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR7]], !noalias [[META76]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META73]] // CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I14_I_I]], align 2, !tbaa [[TBAA81:![0-9]+]], !noalias [[META72]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] -// CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt4plusIvET_EENS0_3vecIS5_Li3EEEE4typeERKSB_SF_.exit: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META79]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META79]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP83:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailplERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEES8_.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META72]] +// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META72]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META66]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, @@ -148,42 +148,46 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.265") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.265") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.265") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA14]], !noalias [[META99:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META99]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA13]], !noalias [[META91:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA13]], !noalias [[META91]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <16 x i1> [[CMP_I_I_I_I]] to <16 x i32> -// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 64, !alias.scope [[META99]] +// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 64, !alias.scope [[META91]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { return a > b; } -// CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr noundef readonly byval(%"class.sycl::_V1::vec.282") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.282") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {{.*}}{ +// CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.305") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.342") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 -// CHECK-NEXT: [[LOADVECN_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVECN_I_I]], [[LOADVECN_I_I2]] -// CHECK-NEXT: [[CMP:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <3 x i32> -// CHECK-NEXT: [[SEXT:%.*]] = sext <3 x i1> [[CMP]] to <3 x i8> -// CHECK-NEXT: ret <3 x i8> [[SEXT]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 4, !noalias [[META98:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I5_I_I:%.*]] = load <4 x i8>, ptr [[B]], align 4, !noalias [[META98]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <4 x i8> [[LOADVECN_I_I_I]], [[LOADVECN_I5_I_I]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <3 x i32> +// CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i8> +// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i8> [[SEXT_I_I_I_I]], <3 x i8> poison, <4 x i32> +// CHECK-NEXT: store <4 x i8> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META99:![0-9]+]] +// CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { return a > b; } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.383") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.420") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA14]], !noalias [[META108:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META108]] +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA13]], !noalias [[META108:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA13]], !noalias [[META108]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <2 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i8> // CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] @@ -194,15 +198,15 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.461") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.498") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.498") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META119:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META119]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META115:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA13]], !noalias [[META118:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA13]], !noalias [[META118]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <8 x i1> [[CMP_I_I_I_I]] to <8 x i16> -// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META119]] +// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META118]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -210,35 +214,35 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.539") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.576") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.576") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.450", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.539", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] -// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META127:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META122:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META119]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META125:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILGTINS0_3EXT6ONEAPI8BFLOAT16EEENST9ENABLE_IFIX24IS_OP_AVAILABLE_FOR_TYPEIST7GREATERIVET_EENS0_3VECISLI4EEEE4TYPEERKNSA_IS5_LI4EEESG__EXIT:%.*]] +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILGTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI4EEES8__EXIT:%.*]] // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META127]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR7]], !noalias [[META125]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I14_I_I]]) #[[ATTR7]], !noalias [[META125]] // CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I_I]] // CHECK-NEXT: [[CONV6_I_I:%.*]] = sext i1 [[CMP_I_I_I_I_I]] to i16 // CHECK-NEXT: [[ARRAYIDX_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] +// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I16_I_I]], align 2, !tbaa [[TBAA81]], !noalias [[META125]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] -// CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt7greaterIvET_EENS0_3vecIsLi4EEEE4typeERKNSA_IS5_Li4EEESG_.exit: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META127]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META127]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP126:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailgtERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi4EEES8_.exit: +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META125]] +// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META125]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META119]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, @@ -249,92 +253,92 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.613") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META127:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) -// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] +// CHECK-NEXT: [[LOADVECN_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META133:![0-9]+]] // CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVECN_I_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META136]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META133]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.651") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.651") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META144:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META134:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META137:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA13]], !noalias [[META140:![0-9]+]] // CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] -// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META144]] +// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META140]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.689") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.689") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META152:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META144:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA13]], !noalias [[META147:![0-9]+]] // CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) -// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META152]] +// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META147]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.727") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.150") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META160:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META151:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA13]], !noalias [[META154:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <4 x i1> [[CMP_I_I_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META155:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.765") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.802") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META171:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META161:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA13]], !noalias [[META164:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] +// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META165:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.498") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.498") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META182:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META171:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA13]], !noalias [[META174:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <8 x half> [[TMP0]] -// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META182]] +// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META174]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.841") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.228") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.730", align 8 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.841", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] -// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META190:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META175:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META175]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META181:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -342,29 +346,29 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META190]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR7]], !noalias [[META181]] // CHECK-NEXT: [[TOBOOL_I_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV2_I_I:%.*]] = sext i1 [[TOBOOL_I_I_I]] to i16 // CHECK-NEXT: [[ARRAYIDX_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] +// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I9_I_I]], align 2, !tbaa [[TBAA81]], !noalias [[META181]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP182:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META190]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META190]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META181]] +// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META181]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META175]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.878") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.878") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 -// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.768", align 32 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.878", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META183:![0-9]+]] +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META186:![0-9]+]] // CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: @@ -373,19 +377,19 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META189:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I]]) #[[ATTR7]], !noalias [[META192:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg float [[CALL_I_I_I_I_I]] -// CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86]], !noalias [[META202]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META202]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199]] +// CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA79]], !noalias [[META192]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR7]], !noalias [[META192]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META189]] // CHECK-NEXT: [[ARRAYIDX_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I7_I_I]], align 2, !tbaa [[TBAA81]], !noalias [[META186]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP195:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META183]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; }