diff --git a/clang/lib/Sema/SPIRVBuiltins.td b/clang/lib/Sema/SPIRVBuiltins.td index bba559f0f255a..4f7bd7d182d19 100644 --- a/clang/lib/Sema/SPIRVBuiltins.td +++ b/clang/lib/Sema/SPIRVBuiltins.td @@ -984,6 +984,42 @@ foreach name = ["GroupLogicalAndKHR", "GroupLogicalOrKHR"] in { def : SPVBuiltin; } +def SubgroupShuffleINTELVecType + : GenericType<"SubgroupShuffleINTELVecType", + TypeList<[Char, UChar, Short, UShort, Int, UInt, Float]>, + VecNoScalar>; + +foreach name = ["SubgroupShuffleINTEL", "SubgroupShuffleXorINTEL"] in { + def : SPVBuiltin; + def : SPVBuiltin; +} + +foreach name = ["SubgroupShuffleUpINTEL", "SubgroupShuffleDownINTEL"] in { + def : SPVBuiltin; + def : SPVBuiltin; +} + +foreach name = ["SubgroupBlockWriteINTEL"] in { + foreach AS = [GlobalAS, LocalAS] in { + foreach Ty = TLUnsignedInts.List in { + def : SPVBuiltin, Ty]>; + } + foreach Ty = [UChar, UShort] in { + foreach v = [2, 4, 8, 16] in { + def : SPVBuiltin, VectorType]>; + } + } + foreach Ty = [UInt, ULong] in { + foreach v = [2, 4, 8] in { + def : SPVBuiltin, VectorType]>; + } + } + } +} + // 3.56.24. Non-Uniform Instructions foreach name = ["GroupNonUniformElect"] in { diff --git a/clang/test/CodeGenSPIRV/spirv-builtin-lookup-intel-subgroup.cpp b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-intel-subgroup.cpp new file mode 100644 index 0000000000000..19ef6c2530a21 --- /dev/null +++ b/clang/test/CodeGenSPIRV/spirv-builtin-lookup-intel-subgroup.cpp @@ -0,0 +1,252 @@ +// RUN: %clang_cc1 -triple=spirv64 -fdeclare-spirv-builtins -emit-llvm %s -o - | FileCheck %s + +template void test_shuffle() { + T v; + unsigned int id; + __spirv_SubgroupShuffleINTEL(v, id); + __spirv_SubgroupShuffleXorINTEL(v, id); + __spirv_SubgroupShuffleUpINTEL(v, v, id); + __spirv_SubgroupShuffleDownINTEL(v, v, id); +} + +template void test_shuffle_scalar_and_vector() { + test_shuffle(); + test_shuffle(); + test_shuffle(); + test_shuffle(); + test_shuffle(); + test_shuffle(); +} + +template void test_block_write_addrspace(PtrTy ptr) { + T v; + __spirv_SubgroupBlockWriteINTEL(ptr, v); + using T2 = T __attribute__((ext_vector_type(2))); + T2 v2; + __spirv_SubgroupBlockWriteINTEL(ptr, v2); + using T4 = T __attribute__((ext_vector_type(4))); + T4 v4; + __spirv_SubgroupBlockWriteINTEL(ptr, v4); + using T8 = T __attribute__((ext_vector_type(8))); + T8 v8; + __spirv_SubgroupBlockWriteINTEL(ptr, v8); +} + +template void test_block_write_addrspace_v16(PtrTy ptr) { + using T16 = T __attribute__((ext_vector_type(16))); + T16 v16; + __spirv_SubgroupBlockWriteINTEL(ptr, v16); +} + +template void test_block_write() { + __attribute__((opencl_global)) T * gptr; + test_block_write_addrspace(gptr); + __attribute__((opencl_local)) T * lptr; + test_block_write_addrspace(lptr); +} + +template void test_block_write_v16() { + __attribute__((opencl_global)) T * gptr; + test_block_write_addrspace_v16(gptr); + __attribute__((opencl_local)) T * lptr; + test_block_write_addrspace_v16(lptr); +} + +void test() { + test_shuffle<_Float16>(); + test_shuffle(); + + test_shuffle_scalar_and_vector(); + test_shuffle_scalar_and_vector(); + test_shuffle_scalar_and_vector(); + test_shuffle_scalar_and_vector(); + test_shuffle_scalar_and_vector(); + test_shuffle_scalar_and_vector(); + + test_block_write(); + test_block_write_v16(); + test_block_write(); + test_block_write_v16(); + test_block_write(); +} + +// CHECK: call spir_func noundef half @_Z28__spirv_SubgroupShuffleINTELDF16_j +// CHECK: call spir_func noundef half @_Z31__spirv_SubgroupShuffleXorINTELDF16_j +// CHECK: call spir_func noundef half @_Z30__spirv_SubgroupShuffleUpINTELDF16_DF16_j +// CHECK: call spir_func noundef half @_Z32__spirv_SubgroupShuffleDownINTELDF16_DF16_j +// CHECK: call spir_func noundef double @_Z28__spirv_SubgroupShuffleINTELdj +// CHECK: call spir_func noundef double @_Z31__spirv_SubgroupShuffleXorINTELdj +// CHECK: call spir_func noundef double @_Z30__spirv_SubgroupShuffleUpINTELddj +// CHECK: call spir_func noundef double @_Z32__spirv_SubgroupShuffleDownINTELddj +// CHECK: call spir_func noundef zeroext i8 @_Z28__spirv_SubgroupShuffleINTELhj +// CHECK: call spir_func noundef zeroext i8 @_Z31__spirv_SubgroupShuffleXorINTELhj +// CHECK: call spir_func noundef zeroext i8 @_Z30__spirv_SubgroupShuffleUpINTELhhj +// CHECK: call spir_func noundef zeroext i8 @_Z32__spirv_SubgroupShuffleDownINTELhhj +// CHECK: call spir_func noundef <2 x i8> @_Z28__spirv_SubgroupShuffleINTELDv2_hj +// CHECK: call spir_func noundef <2 x i8> @_Z31__spirv_SubgroupShuffleXorINTELDv2_hj +// CHECK: call spir_func noundef <2 x i8> @_Z30__spirv_SubgroupShuffleUpINTELDv2_hS_j +// CHECK: call spir_func noundef <2 x i8> @_Z32__spirv_SubgroupShuffleDownINTELDv2_hS_j +// CHECK: call spir_func noundef <3 x i8> @_Z28__spirv_SubgroupShuffleINTELDv3_hj +// CHECK: call spir_func noundef <3 x i8> @_Z31__spirv_SubgroupShuffleXorINTELDv3_hj +// CHECK: call spir_func noundef <3 x i8> @_Z30__spirv_SubgroupShuffleUpINTELDv3_hS_j +// CHECK: call spir_func noundef <3 x i8> @_Z32__spirv_SubgroupShuffleDownINTELDv3_hS_j +// CHECK: call spir_func noundef <4 x i8> @_Z28__spirv_SubgroupShuffleINTELDv4_hj +// CHECK: call spir_func noundef <4 x i8> @_Z31__spirv_SubgroupShuffleXorINTELDv4_hj +// CHECK: call spir_func noundef <4 x i8> @_Z30__spirv_SubgroupShuffleUpINTELDv4_hS_j +// CHECK: call spir_func noundef <4 x i8> @_Z32__spirv_SubgroupShuffleDownINTELDv4_hS_j +// CHECK: call spir_func noundef <8 x i8> @_Z28__spirv_SubgroupShuffleINTELDv8_hj +// CHECK: call spir_func noundef <8 x i8> @_Z31__spirv_SubgroupShuffleXorINTELDv8_hj +// CHECK: call spir_func noundef <8 x i8> @_Z30__spirv_SubgroupShuffleUpINTELDv8_hS_j +// CHECK: call spir_func noundef <8 x i8> @_Z32__spirv_SubgroupShuffleDownINTELDv8_hS_j +// CHECK: call spir_func noundef <16 x i8> @_Z28__spirv_SubgroupShuffleINTELDv16_hj +// CHECK: call spir_func noundef <16 x i8> @_Z31__spirv_SubgroupShuffleXorINTELDv16_hj +// CHECK: call spir_func noundef <16 x i8> @_Z30__spirv_SubgroupShuffleUpINTELDv16_hS_j +// CHECK: call spir_func noundef <16 x i8> @_Z32__spirv_SubgroupShuffleDownINTELDv16_hS_j +// CHECK: call spir_func noundef i32 @_Z28__spirv_SubgroupShuffleINTELij +// CHECK: call spir_func noundef i32 @_Z31__spirv_SubgroupShuffleXorINTELij +// CHECK: call spir_func noundef i32 @_Z30__spirv_SubgroupShuffleUpINTELiij +// CHECK: call spir_func noundef i32 @_Z32__spirv_SubgroupShuffleDownINTELiij +// CHECK: call spir_func noundef <2 x i32> @_Z28__spirv_SubgroupShuffleINTELDv2_ij +// CHECK: call spir_func noundef <2 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv2_ij +// CHECK: call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv2_iS_j +// CHECK: call spir_func noundef <2 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv2_iS_j +// CHECK: call spir_func noundef <3 x i32> @_Z28__spirv_SubgroupShuffleINTELDv3_ij +// CHECK: call spir_func noundef <3 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv3_ij +// CHECK: call spir_func noundef <3 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv3_iS_j +// CHECK: call spir_func noundef <3 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv3_iS_j +// CHECK: call spir_func noundef <4 x i32> @_Z28__spirv_SubgroupShuffleINTELDv4_ij +// CHECK: call spir_func noundef <4 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv4_ij +// CHECK: call spir_func noundef <4 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv4_iS_j +// CHECK: call spir_func noundef <4 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv4_iS_j +// CHECK: call spir_func noundef <8 x i32> @_Z28__spirv_SubgroupShuffleINTELDv8_ij +// CHECK: call spir_func noundef <8 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv8_ij +// CHECK: call spir_func noundef <8 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv8_iS_j +// CHECK: call spir_func noundef <8 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv8_iS_j +// CHECK: call spir_func noundef <16 x i32> @_Z28__spirv_SubgroupShuffleINTELDv16_ij +// CHECK: call spir_func noundef <16 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv16_ij +// CHECK: call spir_func noundef <16 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv16_iS_j +// CHECK: call spir_func noundef <16 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv16_iS_j +// CHECK: call spir_func noundef signext i16 @_Z28__spirv_SubgroupShuffleINTELsj +// CHECK: call spir_func noundef signext i16 @_Z31__spirv_SubgroupShuffleXorINTELsj +// CHECK: call spir_func noundef signext i16 @_Z30__spirv_SubgroupShuffleUpINTELssj +// CHECK: call spir_func noundef signext i16 @_Z32__spirv_SubgroupShuffleDownINTELssj +// CHECK: call spir_func noundef <2 x i16> @_Z28__spirv_SubgroupShuffleINTELDv2_sj +// CHECK: call spir_func noundef <2 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv2_sj +// CHECK: call spir_func noundef <2 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv2_sS_j +// CHECK: call spir_func noundef <2 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv2_sS_j +// CHECK: call spir_func noundef <3 x i16> @_Z28__spirv_SubgroupShuffleINTELDv3_sj +// CHECK: call spir_func noundef <3 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv3_sj +// CHECK: call spir_func noundef <3 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv3_sS_j +// CHECK: call spir_func noundef <3 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv3_sS_j +// CHECK: call spir_func noundef <4 x i16> @_Z28__spirv_SubgroupShuffleINTELDv4_sj +// CHECK: call spir_func noundef <4 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv4_sj +// CHECK: call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv4_sS_j +// CHECK: call spir_func noundef <4 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv4_sS_j +// CHECK: call spir_func noundef <8 x i16> @_Z28__spirv_SubgroupShuffleINTELDv8_sj +// CHECK: call spir_func noundef <8 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv8_sj +// CHECK: call spir_func noundef <8 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv8_sS_j +// CHECK: call spir_func noundef <8 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv8_sS_j +// CHECK: call spir_func noundef <16 x i16> @_Z28__spirv_SubgroupShuffleINTELDv16_sj +// CHECK: call spir_func noundef <16 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv16_sj +// CHECK: call spir_func noundef <16 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv16_sS_j +// CHECK: call spir_func noundef <16 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv16_sS_j +// CHECK: call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj +// CHECK: call spir_func noundef zeroext i16 @_Z31__spirv_SubgroupShuffleXorINTELtj +// CHECK: call spir_func noundef zeroext i16 @_Z30__spirv_SubgroupShuffleUpINTELttj +// CHECK: call spir_func noundef zeroext i16 @_Z32__spirv_SubgroupShuffleDownINTELttj +// CHECK: call spir_func noundef <2 x i16> @_Z28__spirv_SubgroupShuffleINTELDv2_tj +// CHECK: call spir_func noundef <2 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv2_tj +// CHECK: call spir_func noundef <2 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv2_tS_j +// CHECK: call spir_func noundef <2 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv2_tS_j +// CHECK: call spir_func noundef <3 x i16> @_Z28__spirv_SubgroupShuffleINTELDv3_tj +// CHECK: call spir_func noundef <3 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv3_tj +// CHECK: call spir_func noundef <3 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv3_tS_j +// CHECK: call spir_func noundef <3 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv3_tS_j +// CHECK: call spir_func noundef <4 x i16> @_Z28__spirv_SubgroupShuffleINTELDv4_tj +// CHECK: call spir_func noundef <4 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv4_tj +// CHECK: call spir_func noundef <4 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv4_tS_j +// CHECK: call spir_func noundef <4 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv4_tS_j +// CHECK: call spir_func noundef <8 x i16> @_Z28__spirv_SubgroupShuffleINTELDv8_tj +// CHECK: call spir_func noundef <8 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv8_tj +// CHECK: call spir_func noundef <8 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv8_tS_j +// CHECK: call spir_func noundef <8 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv8_tS_j +// CHECK: call spir_func noundef <16 x i16> @_Z28__spirv_SubgroupShuffleINTELDv16_tj +// CHECK: call spir_func noundef <16 x i16> @_Z31__spirv_SubgroupShuffleXorINTELDv16_tj +// CHECK: call spir_func noundef <16 x i16> @_Z30__spirv_SubgroupShuffleUpINTELDv16_tS_j +// CHECK: call spir_func noundef <16 x i16> @_Z32__spirv_SubgroupShuffleDownINTELDv16_tS_j +// CHECK: call spir_func noundef i32 @_Z28__spirv_SubgroupShuffleINTELjj +// CHECK: call spir_func noundef i32 @_Z31__spirv_SubgroupShuffleXorINTELjj +// CHECK: call spir_func noundef i32 @_Z30__spirv_SubgroupShuffleUpINTELjjj +// CHECK: call spir_func noundef i32 @_Z32__spirv_SubgroupShuffleDownINTELjjj +// CHECK: call spir_func noundef <2 x i32> @_Z28__spirv_SubgroupShuffleINTELDv2_jj +// CHECK: call spir_func noundef <2 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv2_jj +// CHECK: call spir_func noundef <2 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv2_jS_j +// CHECK: call spir_func noundef <2 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv2_jS_j +// CHECK: call spir_func noundef <3 x i32> @_Z28__spirv_SubgroupShuffleINTELDv3_jj +// CHECK: call spir_func noundef <3 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv3_jj +// CHECK: call spir_func noundef <3 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv3_jS_j +// CHECK: call spir_func noundef <3 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv3_jS_j +// CHECK: call spir_func noundef <4 x i32> @_Z28__spirv_SubgroupShuffleINTELDv4_jj +// CHECK: call spir_func noundef <4 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv4_jj +// CHECK: call spir_func noundef <4 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv4_jS_j +// CHECK: call spir_func noundef <4 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv4_jS_j +// CHECK: call spir_func noundef <8 x i32> @_Z28__spirv_SubgroupShuffleINTELDv8_jj +// CHECK: call spir_func noundef <8 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv8_jj +// CHECK: call spir_func noundef <8 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv8_jS_j +// CHECK: call spir_func noundef <8 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv8_jS_j +// CHECK: call spir_func noundef <16 x i32> @_Z28__spirv_SubgroupShuffleINTELDv16_jj +// CHECK: call spir_func noundef <16 x i32> @_Z31__spirv_SubgroupShuffleXorINTELDv16_jj +// CHECK: call spir_func noundef <16 x i32> @_Z30__spirv_SubgroupShuffleUpINTELDv16_jS_j +// CHECK: call spir_func noundef <16 x i32> @_Z32__spirv_SubgroupShuffleDownINTELDv16_jS_j +// CHECK: call spir_func noundef float @_Z28__spirv_SubgroupShuffleINTELfj +// CHECK: call spir_func noundef float @_Z31__spirv_SubgroupShuffleXorINTELfj +// CHECK: call spir_func noundef float @_Z30__spirv_SubgroupShuffleUpINTELffj +// CHECK: call spir_func noundef float @_Z32__spirv_SubgroupShuffleDownINTELffj +// CHECK: call spir_func noundef <2 x float> @_Z28__spirv_SubgroupShuffleINTELDv2_fj +// CHECK: call spir_func noundef <2 x float> @_Z31__spirv_SubgroupShuffleXorINTELDv2_fj +// CHECK: call spir_func noundef <2 x float> @_Z30__spirv_SubgroupShuffleUpINTELDv2_fS_j +// CHECK: call spir_func noundef <2 x float> @_Z32__spirv_SubgroupShuffleDownINTELDv2_fS_j +// CHECK: call spir_func noundef <3 x float> @_Z28__spirv_SubgroupShuffleINTELDv3_fj +// CHECK: call spir_func noundef <3 x float> @_Z31__spirv_SubgroupShuffleXorINTELDv3_fj +// CHECK: call spir_func noundef <3 x float> @_Z30__spirv_SubgroupShuffleUpINTELDv3_fS_j +// CHECK: call spir_func noundef <3 x float> @_Z32__spirv_SubgroupShuffleDownINTELDv3_fS_j +// CHECK: call spir_func noundef <4 x float> @_Z28__spirv_SubgroupShuffleINTELDv4_fj +// CHECK: call spir_func noundef <4 x float> @_Z31__spirv_SubgroupShuffleXorINTELDv4_fj +// CHECK: call spir_func noundef <4 x float> @_Z30__spirv_SubgroupShuffleUpINTELDv4_fS_j +// CHECK: call spir_func noundef <4 x float> @_Z32__spirv_SubgroupShuffleDownINTELDv4_fS_j +// CHECK: call spir_func noundef <8 x float> @_Z28__spirv_SubgroupShuffleINTELDv8_fj +// CHECK: call spir_func noundef <8 x float> @_Z31__spirv_SubgroupShuffleXorINTELDv8_fj +// CHECK: call spir_func noundef <8 x float> @_Z30__spirv_SubgroupShuffleUpINTELDv8_fS_j +// CHECK: call spir_func noundef <8 x float> @_Z32__spirv_SubgroupShuffleDownINTELDv8_fS_j +// CHECK: call spir_func noundef <16 x float> @_Z28__spirv_SubgroupShuffleINTELDv16_fj +// CHECK: call spir_func noundef <16 x float> @_Z31__spirv_SubgroupShuffleXorINTELDv16_fj +// CHECK: call spir_func noundef <16 x float> @_Z30__spirv_SubgroupShuffleUpINTELDv16_fS_j +// CHECK: call spir_func noundef <16 x float> @_Z32__spirv_SubgroupShuffleDownINTELDv16_fS_j +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hh +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv2_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv4_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv8_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3hh +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3hDv2_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3hDv4_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3hDv8_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1hDv16_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3hDv16_h +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tt +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv2_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv4_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv8_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tt +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv2_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv4_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv8_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv16_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv16_t +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv2_j +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv4_j +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv8_j +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jj +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv2_j +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv4_j +// CHECK: call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv8_j diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl b/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl index 27218592b2487..910541561950e 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/group/collectives.cl @@ -38,27 +38,6 @@ __clc__get_group_scratch_float() __asm("__clc__get_group_scratch_float"); __local double * __clc__get_group_scratch_double() __asm("__clc__get_group_scratch_double"); -#define __CLC_DECLARE_SHUFFLES(TYPE, TYPE_MANGLED) \ - _CLC_DECL TYPE _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \ - TYPE, int); \ - _CLC_DECL TYPE \ - _Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j( \ - TYPE, TYPE, unsigned int); - -__CLC_DECLARE_SHUFFLES(char, a); -__CLC_DECLARE_SHUFFLES(unsigned char, h); -__CLC_DECLARE_SHUFFLES(short, s); -__CLC_DECLARE_SHUFFLES(unsigned short, t); -__CLC_DECLARE_SHUFFLES(int, i); -__CLC_DECLARE_SHUFFLES(unsigned int, j); -__CLC_DECLARE_SHUFFLES(half, DF16_); -__CLC_DECLARE_SHUFFLES(float, f); -__CLC_DECLARE_SHUFFLES(long, l); -__CLC_DECLARE_SHUFFLES(unsigned long, m); -__CLC_DECLARE_SHUFFLES(double, d); - -#undef __CLC_DECLARE_SHUFFLES - #define __CLC_APPEND(NAME, SUFFIX) NAME##SUFFIX #define __CLC_ADD(x, y) (x + y) @@ -71,13 +50,11 @@ __CLC_DECLARE_SHUFFLES(double, d); #define __CLC_LOGICAL_OR(x, y) (x || y) #define __CLC_LOGICAL_AND(x, y) (x && y) -#define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, TYPE_MANGLED, IDENTITY) \ +#define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, IDENTITY) \ uint sg_lid = __spirv_SubgroupLocalInvocationId(); \ /* Can't use XOR/butterfly shuffles; some lanes may be inactive */ \ for (int o = 1; o < __spirv_SubgroupMaxSize(); o *= 2) { \ - TYPE contribution = \ - _Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j(x, x, \ - o); \ + TYPE contribution = __spirv_SubgroupShuffleUpINTEL(x, x, o); \ bool inactive = (sg_lid < o); \ contribution = (inactive) ? IDENTITY : contribution; \ x = OP(x, contribution); \ @@ -85,8 +62,7 @@ __CLC_DECLARE_SHUFFLES(double, d); /* For Reduce, broadcast result from highest active lane */ \ TYPE result; \ if (op == Reduce) { \ - result = _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \ - x, __spirv_SubgroupSize() - 1); \ + result = __spirv_SubgroupShuffleINTEL(x, __spirv_SubgroupSize() - 1); \ *carry = result; \ } /* For InclusiveScan, use results as computed */ \ else if (op == InclusiveScan) { \ @@ -95,101 +71,100 @@ __CLC_DECLARE_SHUFFLES(double, d); } /* For ExclusiveScan, shift and prepend identity */ \ else if (op == ExclusiveScan) { \ *carry = x; \ - result = _Z30__spirv_SubgroupShuffleUpINTELI##TYPE_MANGLED##ET_S0_S0_j( \ - x, x, 1); \ + result = __spirv_SubgroupShuffleUpINTEL(x, x, 1); \ if (sg_lid == 0) { \ result = IDENTITY; \ } \ } \ return result; -#define __CLC_SUBGROUP_COLLECTIVE(NAME, OP, TYPE, TYPE_MANGLED, IDENTITY) \ +#define __CLC_SUBGROUP_COLLECTIVE(NAME, OP, TYPE, IDENTITY) \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __CLC_APPEND( \ __clc__Subgroup, NAME)(uint op, TYPE x, TYPE * carry) { \ - __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, TYPE_MANGLED, IDENTITY) \ + __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, IDENTITY) \ } -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, char, a, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uchar, h, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, short, s, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ushort, t, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, int, i, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uint, j, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, long, l, 0) -__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, m, 0) -__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, half, DF16_, 0) -__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, f, 0) -__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, d, 0) - -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, a, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, h, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, s, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, t, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, i, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, j, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, l, 1) -__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, m, 1) -__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, half, DF16_, 1) -__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, f, 1) -__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, d, 1) - -__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, a, CHAR_MAX) -__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, h, UCHAR_MAX) -__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, short, s, SHRT_MAX) -__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ushort, t, USHRT_MAX) -__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, int, i, INT_MAX) -__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uint, j, UINT_MAX) -__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, long, l, LONG_MAX) -__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ulong, m, ULONG_MAX) -__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, half, DF16_, INFINITY) -__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, float, f, INFINITY) -__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, double, d, INFINITY) - -__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, char, a, CHAR_MIN) -__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uchar, h, 0) -__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, short, s, SHRT_MIN) -__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ushort, t, 0) -__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, int, i, INT_MIN) -__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uint, j, 0) -__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, long, l, LONG_MIN) -__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ulong, m, 0) -__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, half, DF16_, -INFINITY) -__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, f, -INFINITY) -__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, d, -INFINITY) - -__CLC_SUBGROUP_COLLECTIVE(All, __CLC_AND, bool, a, true) -__CLC_SUBGROUP_COLLECTIVE(Any, __CLC_OR, bool, a, false) - -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uchar, h, ~0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uchar, h, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uchar, h, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, char, a, ~0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, char, a, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, char, a, 0) - -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ushort, t, ~0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ushort, t, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ushort, t, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, short, s, ~0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, short, s, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, short, s, 0) - -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uint, j, ~0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uint, j, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uint, j, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, int, i, ~0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, int, i, 0) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, int, i, 0) - -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ulong, m, ~0l) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ulong, m, 0l) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ulong, m, 0l) -__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, long, l, ~0l) -__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, l, 0l) -__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, l, 0l) - -__CLC_SUBGROUP_COLLECTIVE(LogicalOrKHR, __CLC_LOGICAL_OR, bool, a, false) -__CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, a, true) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, char, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uchar, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, short, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ushort, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, int, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, uint, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, long, 0) +__CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, 0) +__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0) +__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) +__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) + +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, half, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1) + +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX) +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, short, SHRT_MAX) +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ushort, USHRT_MAX) +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, int, INT_MAX) +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uint, UINT_MAX) +__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, long, LONG_MAX) +__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, ulong, ULONG_MAX) +__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, half, INFINITY) +__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, float, INFINITY) +__CLC_SUBGROUP_COLLECTIVE(FMin, __CLC_MIN, double, INFINITY) + +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, char, CHAR_MIN) +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uchar, 0) +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, short, SHRT_MIN) +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ushort, 0) +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, int, INT_MIN) +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, uint, 0) +__CLC_SUBGROUP_COLLECTIVE(SMax, __CLC_MAX, long, LONG_MIN) +__CLC_SUBGROUP_COLLECTIVE(UMax, __CLC_MAX, ulong, 0) +__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, half, -INFINITY) +__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, -INFINITY) +__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, -INFINITY) + +__CLC_SUBGROUP_COLLECTIVE(All, __CLC_AND, bool, true) +__CLC_SUBGROUP_COLLECTIVE(Any, __CLC_OR, bool, false) + +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uchar, ~0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uchar, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uchar, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, char, ~0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, char, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, char, 0) + +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ushort, ~0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ushort, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ushort, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, short, ~0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, short, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, short, 0) + +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uint, ~0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uint, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uint, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, int, ~0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, int, 0) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, int, 0) + +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, ulong, ~0l) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, ulong, 0l) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, ulong, 0l) +__CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, long, ~0l) +__CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, 0l) +__CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l) + +__CLC_SUBGROUP_COLLECTIVE(LogicalOrKHR, __CLC_LOGICAL_OR, bool, false) +__CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true) #undef __CLC_SUBGROUP_COLLECTIVE_BODY #undef __CLC_SUBGROUP_COLLECTIVE @@ -380,10 +355,9 @@ long __clc__3d_to_linear_local_id(ulong3 id) { #define __CLC_GROUP_BROADCAST(TYPE, TYPE_MANGLED) \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, ulong local_id) { \ + int scope, TYPE x, ulong local_id) { \ if (scope == Subgroup) { \ - return _Z28__spirv_SubgroupShuffleINTELI##TYPE_MANGLED##ET_S0_j( \ - x, local_id); \ + return __spirv_SubgroupShuffleINTEL(x, local_id); \ } \ bool source = (__spirv_LocalInvocationIndex() == local_id); \ __local TYPE *scratch = __CLC_APPEND(__clc__get_group_scratch_, TYPE)(); \ @@ -396,17 +370,17 @@ long __clc__3d_to_linear_local_id(ulong3 id) { return result; \ } \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, ulong2 local_id) { \ + int scope, TYPE x, ulong2 local_id) { \ ulong linear_local_id = __clc__2d_to_linear_local_id(local_id); \ return __spirv_GroupBroadcast(scope, x, linear_local_id); \ } \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, ulong3 local_id) { \ + int scope, TYPE x, ulong3 local_id) { \ ulong linear_local_id = __clc__3d_to_linear_local_id(local_id); \ return __spirv_GroupBroadcast(scope, x, linear_local_id); \ } \ _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \ - int scope, TYPE x, uint local_id) { \ + int scope, TYPE x, uint local_id) { \ return __spirv_GroupBroadcast(scope, x, (ulong)local_id); \ } __CLC_GROUP_BROADCAST(char, a); diff --git a/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl b/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl index 0720eb4b3339c..13ac2ee2ddc24 100644 --- a/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl +++ b/libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl @@ -12,14 +12,29 @@ #define SUBGROUP_SIZE __spirv_SubgroupMaxSize() // Shuffle +_CLC_OVERLOAD _CLC_DEF int +__spirv_SubgroupShuffleINTEL(int Data, unsigned int InvocationId) { + int Index = InvocationId; + return __builtin_amdgcn_ds_bpermute(Index << 2, Data); +} // int __spirv_SubgroupShuffleINTEL(int, unsigned int) _CLC_DEF int _Z28__spirv_SubgroupShuffleINTELIiET_S0_j(int Data, unsigned int InvocationId) { - int Index = InvocationId; - return __builtin_amdgcn_ds_bpermute(Index << 2, Data); + return __spirv_SubgroupShuffleINTEL(Data, InvocationId); } // Sub 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_SUB_I32(TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + return __spirv_SubgroupShuffleINTEL((int)Data, InvocationId); \ + } +__AMDGCN_CLC_SUBGROUP_SUB_I32(char); +__AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned char); +__AMDGCN_CLC_SUBGROUP_SUB_I32(short); +__AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned short); +#undef __AMDGCN_CLC_SUBGROUP_SUB_I32 + // _Z28__spirv_SubgroupShuffleINTELIaET_S0_j - char // _Z28__spirv_SubgroupShuffleINTELIhET_S0_j - unsigned char // _Z28__spirv_SubgroupShuffleINTELIsET_S0_j - long @@ -28,7 +43,7 @@ _Z28__spirv_SubgroupShuffleINTELIiET_S0_j(int Data, unsigned int InvocationId) { #define __AMDGCN_CLC_SUBGROUP_SUB_I32(TYPE, MANGLED_TYPE_NAME) \ _CLC_DEF TYPE _Z28__spirv_SubgroupShuffleINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ TYPE Data, unsigned int InvocationId) { \ - return _Z28__spirv_SubgroupShuffleINTELIiET_S0_j(Data, InvocationId); \ + return __spirv_SubgroupShuffleINTEL(Data, InvocationId); \ } __AMDGCN_CLC_SUBGROUP_SUB_I32(char, a); __AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned char, h); @@ -37,42 +52,64 @@ __AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned short, t); #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j( - half Data, unsigned int InvocationId) { +_CLC_OVERLOAD _CLC_DEF half +__spirv_SubgroupShuffleINTEL(half Data, unsigned int InvocationId) { unsigned short tmp = __clc_as_ushort(Data); - tmp = (unsigned short)_Z28__spirv_SubgroupShuffleINTELIiET_S0_j((int)tmp, - InvocationId); + tmp = __spirv_SubgroupShuffleINTEL(tmp, InvocationId); return __clc_as_half(tmp); } +_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j( + half Data, unsigned int InvocationId) { + return __spirv_SubgroupShuffleINTEL(Data, InvocationId); +} #endif // cl_khr_fp16 #undef __AMDGCN_CLC_SUBGROUP_SUB_I32 // 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_I32(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + return __builtin_astype( \ + __spirv_SubgroupShuffleINTEL(__clc_as_int(Data), InvocationId), \ + CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_I32(unsigned int, uint); +__AMDGCN_CLC_SUBGROUP_I32(float, float); +#undef __AMDGCN_CLC_SUBGROUP_I32 + // __spirv_SubgroupShuffleINTEL - unsigned int // __spirv_SubgroupShuffleINTEL- float -#define __AMDGCN_CLC_SUBGROUP_I32(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ - _CLC_DEF TYPE _Z28__spirv_SubgroupShuffleINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ - TYPE Data, unsigned int InvocationId) { \ - return __builtin_astype( \ - _Z28__spirv_SubgroupShuffleINTELIiET_S0_j(__clc_as_int(Data), InvocationId), \ - CAST_TYPE); \ +#define __AMDGCN_CLC_SUBGROUP_I32(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ + _CLC_DEF TYPE _Z28__spirv_SubgroupShuffleINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ + TYPE Data, unsigned int InvocationId) { \ + return __spirv_SubgroupShuffleINTEL(Data, InvocationId); \ } __AMDGCN_CLC_SUBGROUP_I32(unsigned int, uint, j); __AMDGCN_CLC_SUBGROUP_I32(float, float, f); #undef __AMDGCN_CLC_SUBGROUP_I32 // 64-bit types. +#define __AMDGCN_CLC_SUBGROUP_I64(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + int2 tmp = __clc_as_int2(Data); \ + tmp.lo = __spirv_SubgroupShuffleINTEL(tmp.lo, InvocationId); \ + tmp.hi = __spirv_SubgroupShuffleINTEL(tmp.hi, InvocationId); \ + return __builtin_astype(tmp, CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_I64(long, long); +__AMDGCN_CLC_SUBGROUP_I64(unsigned long, ulong); +__AMDGCN_CLC_SUBGROUP_I64(double, double); +#undef __AMDGCN_CLC_SUBGROUP_I64 + // __spirv_SubgroupShuffleINTEL - long // __spirv_SubgroupShuffleINTEL - unsigned long // __spirv_SubgroupShuffleINTEL - double #define __AMDGCN_CLC_SUBGROUP_I64(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ _CLC_DEF TYPE _Z28__spirv_SubgroupShuffleINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ TYPE Data, unsigned int InvocationId) { \ - int2 tmp = __clc_as_int2(Data); \ - tmp.lo = _Z28__spirv_SubgroupShuffleINTELIiET_S0_j(tmp.lo, InvocationId); \ - tmp.hi = _Z28__spirv_SubgroupShuffleINTELIiET_S0_j(tmp.hi, InvocationId); \ - return __builtin_astype(tmp, CAST_TYPE); \ + return __spirv_SubgroupShuffleINTEL(Data, InvocationId); \ } __AMDGCN_CLC_SUBGROUP_I64(long, long, l); __AMDGCN_CLC_SUBGROUP_I64(unsigned long, ulong, m); @@ -80,16 +117,76 @@ __AMDGCN_CLC_SUBGROUP_I64(double, double, d); #undef __AMDGCN_CLC_SUBGROUP_I64 // Vector types. +#define __AMDGCN_CLC_SUBGROUP_TO_VEC(TYPE, NUM_ELEMS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + TYPE res; \ + for (int i = 0; i < NUM_ELEMS; ++i) { \ + res[i] = __spirv_SubgroupShuffleINTEL(Data[i], InvocationId); \ + } \ + return res; \ + } + +// [u]char +__AMDGCN_CLC_SUBGROUP_TO_VEC(char2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(char4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(char8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(char16, 16) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uchar2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uchar4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uchar8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uchar16, 16) +// [u]short +__AMDGCN_CLC_SUBGROUP_TO_VEC(short2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(short4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(short8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(short16, 16) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ushort2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ushort4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ushort8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ushort16, 16) +// [u]int +__AMDGCN_CLC_SUBGROUP_TO_VEC(int2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(int4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(int8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(int16, 16) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uint2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uint4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uint8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(uint16, 16) +// [u]long +__AMDGCN_CLC_SUBGROUP_TO_VEC(long2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(long4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(long8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(long16, 16) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong16, 16) +// half +#ifdef cl_khr_fp16 +__AMDGCN_CLC_SUBGROUP_TO_VEC(half2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(half4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(half8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(half16, 16) +#endif // cl_khr_fp16 +// float +__AMDGCN_CLC_SUBGROUP_TO_VEC(float2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(float4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(float8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(float16, 16) +// double +__AMDGCN_CLC_SUBGROUP_TO_VEC(double2, 2) +__AMDGCN_CLC_SUBGROUP_TO_VEC(double4, 4) +__AMDGCN_CLC_SUBGROUP_TO_VEC(double8, 8) +__AMDGCN_CLC_SUBGROUP_TO_VEC(double16, 16) +#undef __AMDGCN_CLC_SUBGROUP_TO_VEC + #define __AMDGCN_CLC_SUBGROUP_TO_VEC(TYPE, MANGLED_SCALAR_TY, NUM_ELEMS) \ _CLC_DEF TYPE \ _Z28__spirv_SubgroupShuffleINTELIDv##NUM_ELEMS##_##MANGLED_SCALAR_TY##ET_S1_j( \ TYPE Data, unsigned int InvocationId) { \ - TYPE res; \ - for (int i = 0; i < NUM_ELEMS; ++i) { \ - res[i] = _Z28__spirv_SubgroupShuffleINTELI##MANGLED_SCALAR_TY##ET_S0_j( \ - Data[i], InvocationId); \ - } \ - return res; \ + return __spirv_SubgroupShuffleINTEL(Data, InvocationId); \ } // [u]char @@ -148,18 +245,41 @@ __AMDGCN_CLC_SUBGROUP_TO_VEC(double16, d, 16) #undef __AMDGCN_CLC_SUBGROUP_TO_VEC // Shuffle XOR -// int __spirv_SubgroupShuffleXorINTEL(int, unsigned int) -_CLC_DEF int -_Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(int Data, - unsigned int InvocationId) { +_CLC_OVERLOAD _CLC_DEF int +__spirv_SubgroupShuffleXorINTEL(int Data, unsigned int InvocationId) { int self = SELF; unsigned int index = self ^ InvocationId; index = index >= ((self + SUBGROUP_SIZE) & ~(SUBGROUP_SIZE - 1)) ? self : index; return __builtin_amdgcn_ds_bpermute(index << 2, Data); } +// int __spirv_SubgroupShuffleXorINTEL(int, unsigned int) +_CLC_DEF int +_Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(int Data, + unsigned int InvocationId) { + return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId); +} // Sub 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleXor( \ + TYPE Data, unsigned int InvocationId) { \ + return __spirv_SubgroupShuffleXorINTEL((int)Data, InvocationId); \ + } +__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(char); +__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char); +__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short); +__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short); +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DEF half +__spirv_SubgroupShuffleXorINTEL(half Data, unsigned int InvocationId) { + unsigned short tmp = __clc_as_ushort(Data); + tmp = (unsigned short)__spirv_SubgroupShuffleXorINTEL(tmp, InvocationId); + return __clc_as_half(tmp); +} +#endif // cl_khr_fp16 +#undef __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32 + // _Z31__spirv_SubgroupShuffleXorINTELIaET_S0_j - char // _Z31__spirv_SubgroupShuffleXorINTELIhET_S0_j - unsigned char // _Z31__spirv_SubgroupShuffleXorINTELIsET_S0_j - short @@ -169,7 +289,7 @@ _Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(int Data, _CLC_DEF TYPE \ _Z31__spirv_SubgroupShuffleXorINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ TYPE Data, unsigned int InvocationId) { \ - return _Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(Data, InvocationId); \ + return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId); \ } __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(char, a); __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char, h); @@ -178,30 +298,49 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short, t); #ifdef cl_khr_fp16 _CLC_DEF half _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j( half Data, unsigned int InvocationId) { - unsigned short tmp = __clc_as_ushort(Data); - tmp = (unsigned short)_Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j( - (int)tmp, InvocationId); - return __clc_as_half(tmp); + return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId); } #endif // cl_khr_fp16 #undef __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32 // 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_XOR_I32(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleXorINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + return __builtin_astype( \ + __spirv_SubgroupShuffleXorINTEL(__clc_as_int(Data), InvocationId), \ + CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_XOR_I32(unsigned int, uint); +__AMDGCN_CLC_SUBGROUP_XOR_I32(float, float); +#undef __AMDGCN_CLC_SUBGROUP_XOR_I32 + // __spirv_SubgroupShuffleXorINTEL - unsigned int // __spirv_SubgroupShuffleXorINTEL - float #define __AMDGCN_CLC_SUBGROUP_XOR_I32(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ _CLC_DEF TYPE \ _Z31__spirv_SubgroupShuffleXorINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ TYPE Data, unsigned int InvocationId) { \ - return __builtin_astype(_Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j( \ - __clc_as_int(Data), InvocationId), \ - CAST_TYPE); \ + return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId); \ } __AMDGCN_CLC_SUBGROUP_XOR_I32(unsigned int, uint, j); __AMDGCN_CLC_SUBGROUP_XOR_I32(float, float, f); #undef __AMDGCN_CLC_SUBGROUP_XOR_I32 // 64-bit types. +#define __AMDGCN_CLC_SUBGROUP_XOR_I64(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleXorINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + int2 tmp = __clc_as_int2(Data); \ + tmp.lo = __spirv_SubgroupShuffleXorINTEL(tmp.lo, InvocationId); \ + tmp.hi = __spirv_SubgroupShuffleXorINTEL(tmp.hi, InvocationId); \ + return __builtin_astype(tmp, CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_XOR_I64(long, long); +__AMDGCN_CLC_SUBGROUP_XOR_I64(unsigned long, ulong); +__AMDGCN_CLC_SUBGROUP_XOR_I64(double, double); +#undef __AMDGCN_CLC_SUBGROUP_XOR_I64 + // __spirv_SubgroupShuffleXorINTEL - long // __spirv_SubgroupShuffleXorINTEL - unsigned long // __spirv_SubgroupShuffleXorINTEL - double @@ -209,12 +348,7 @@ __AMDGCN_CLC_SUBGROUP_XOR_I32(float, float, f); _CLC_DEF TYPE \ _Z31__spirv_SubgroupShuffleXorINTELI##MANGLED_TYPE_NAME##ET_S0_j( \ TYPE Data, unsigned int InvocationId) { \ - int2 tmp = __clc_as_int2(Data); \ - tmp.lo = \ - _Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(tmp.lo, InvocationId); \ - tmp.hi = \ - _Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(tmp.hi, InvocationId); \ - return __builtin_astype(tmp, CAST_TYPE); \ + return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId); \ } __AMDGCN_CLC_SUBGROUP_XOR_I64(long, long, l); __AMDGCN_CLC_SUBGROUP_XOR_I64(unsigned long, ulong, m); @@ -222,17 +356,76 @@ __AMDGCN_CLC_SUBGROUP_XOR_I64(double, double, d); #undef __AMDGCN_CLC_SUBGROUP_XOR_I64 // Vector types. + +#define __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(TYPE, NUM_ELEMS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleXorINTEL( \ + TYPE Data, unsigned int InvocationId) { \ + TYPE res; \ + for (int i = 0; i < NUM_ELEMS; ++i) { \ + res[i] = __spirv_SubgroupShuffleXorINTEL(Data[i], InvocationId); \ + } \ + return res; \ + } +// [u]char +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(char2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(char4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(char8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(char16, 16) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uchar2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uchar4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uchar8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uchar16, 16) +// [u]short +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(short2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(short4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(short8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(short16, 16) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ushort2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ushort4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ushort8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ushort16, 16) +// [u]int +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(int2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(int4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(int8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(int16, 16) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uint2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uint4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uint8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(uint16, 16) +// [u]long +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(long2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(long4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(long8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(long16, 16) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ulong2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ulong4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ulong8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(ulong16, 16) +// float +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float16, 16) +// half +#ifdef cl_khr_fp16 +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half16, 16) +#endif // cl_khr_fp16 +// double +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double2, 2) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double4, 4) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double8, 8) +__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double16, 16) +#undef __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC + #define __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(TYPE, MANGLED_SCALAR_TY, NUM_ELEMS) \ _CLC_DEF TYPE \ _Z31__spirv_SubgroupShuffleXorINTELIDv##NUM_ELEMS##_##MANGLED_SCALAR_TY##ET_S1_j( \ TYPE Data, unsigned int InvocationId) { \ - TYPE res; \ - for (int i = 0; i < NUM_ELEMS; ++i) { \ - res[i] = \ - _Z31__spirv_SubgroupShuffleXorINTELI##MANGLED_SCALAR_TY##ET_S0_j( \ - Data[i], InvocationId); \ - } \ - return res; \ + return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId); \ } // [u]char __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(char2, a, 2) @@ -290,10 +483,8 @@ __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double16, d, 16) #undef __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC // Shuffle Up -// int __spirv_SubgroupShuffleUpINTEL(int, int, unsigned int) -_CLC_DEF int -_Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(int previous, int current, - unsigned int delta) { +_CLC_OVERLOAD _CLC_DEF int +__spirv_SubgroupShuffleUpINTEL(int previous, int current, unsigned int delta) { int self = SELF; int size = SUBGROUP_SIZE; @@ -313,8 +504,36 @@ _Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(int previous, int current, return __builtin_amdgcn_ds_bpermute(index << 2, val); } +// int __spirv_SubgroupShuffleUpINTEL(int, int, unsigned int) +_CLC_DEF int +_Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(int previous, int current, + unsigned int delta) { + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); +} // Sub 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleUpINTEL( \ + TYPE previous, TYPE current, unsigned int delta) { \ + return __spirv_SubgroupShuffleUpINTEL((int)previous, (int)current, delta); \ + } +__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(char); +__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned char); +__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short); +__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short); +// half +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleUpINTEL(half previous, + half current, + unsigned int delta) { + unsigned short tmpP = __clc_as_ushort(previous); + unsigned short tmpC = __clc_as_ushort(current); + tmpC = __spirv_SubgroupShuffleUpINTEL(tmpP, tmpC, delta); + return __clc_as_half(tmpC); +} +#endif // cl_khr_fp16 +#undef __AMDGCN_CLC_SUBGROUP_UP_SUB_I32 + // _Z30__spirv_SubgroupShuffleUpINTELIaET_S0_S0_j - char // _Z30__spirv_SubgroupShuffleUpINTELIhET_S0_S0_j - unsigned char // _Z30__spirv_SubgroupShuffleUpINTELIsET_S0_S0_j - short @@ -324,8 +543,7 @@ _Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(int previous, int current, _CLC_DEF TYPE \ _Z30__spirv_SubgroupShuffleUpINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ TYPE previous, TYPE current, unsigned int delta) { \ - return _Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j(previous, current, \ - delta); \ + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); \ } __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(char, a); __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned char, h); @@ -335,32 +553,54 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short, t); #ifdef cl_khr_fp16 _CLC_DEF half _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j( half previous, half current, unsigned int delta) { - unsigned short tmpP = __clc_as_ushort(previous); - unsigned short tmpC = __clc_as_ushort(current); - tmpC = (unsigned short)_Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j( - (int)tmpP, (int)tmpC, delta); - return __clc_as_half(tmpC); + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); } #endif // cl_khr_fp16 #undef __AMDGCN_CLC_SUBGROUP_UP_SUB_I32 // 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_UP_I32(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleUpINTEL( \ + TYPE previous, TYPE current, unsigned int delta) { \ + return __builtin_astype( \ + __spirv_SubgroupShuffleUpINTEL(__clc_as_int(previous), \ + __clc_as_int(current), delta), \ + CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_UP_I32(unsigned int, uint); +__AMDGCN_CLC_SUBGROUP_UP_I32(float, float); +#undef __AMDGCN_CLC_SUBGROUP_UP_I32 + // __spirv_SubgroupShuffleUpINTELi - unsigned int // __spirv_SubgroupShuffleUpINTELi - float #define __AMDGCN_CLC_SUBGROUP_UP_I32(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ _CLC_DEF TYPE \ _Z30__spirv_SubgroupShuffleUpINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ TYPE previous, TYPE current, unsigned int delta) { \ - return __builtin_astype(_Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j( \ - __clc_as_int(previous), __clc_as_int(current), \ - delta), \ - CAST_TYPE); \ + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); \ } __AMDGCN_CLC_SUBGROUP_UP_I32(unsigned int, uint, j); __AMDGCN_CLC_SUBGROUP_UP_I32(float, float, f); #undef __AMDGCN_CLC_SUBGROUP_UP_I32 // 64-bit types. +#define __AMDGCN_CLC_SUBGROUP_UP_I64(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleUpINTEL( \ + TYPE previous, TYPE current, unsigned int delta) { \ + int2 tmp_previous = __clc_as_int2(previous); \ + int2 tmp_current = __clc_as_int2(current); \ + int2 ret; \ + ret.lo = __spirv_SubgroupShuffleUpINTEL(tmp_previous.lo, tmp_current.lo, \ + delta); \ + ret.hi = __spirv_SubgroupShuffleUpINTEL(tmp_previous.hi, tmp_current.hi, \ + delta); \ + return __builtin_astype(ret, CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_UP_I64(long, long); +__AMDGCN_CLC_SUBGROUP_UP_I64(unsigned long, ulong); +__AMDGCN_CLC_SUBGROUP_UP_I64(double, double); +#undef __AMDGCN_CLC_SUBGROUP_UP_I64 + // __spirv_SubgroupShuffleUpINTEL - long // __spirv_SubgroupShuffleUpINTEL - unsigned long // __spirv_SubgroupShuffleUpINTEL - double @@ -368,14 +608,7 @@ __AMDGCN_CLC_SUBGROUP_UP_I32(float, float, f); _CLC_DEF TYPE \ _Z30__spirv_SubgroupShuffleUpINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ TYPE previous, TYPE current, unsigned int delta) { \ - int2 tmp_previous = __clc_as_int2(previous); \ - int2 tmp_current = __clc_as_int2(current); \ - int2 ret; \ - ret.lo = _Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j( \ - tmp_previous.lo, tmp_current.lo, delta); \ - ret.hi = _Z30__spirv_SubgroupShuffleUpINTELIiET_S0_S0_j( \ - tmp_previous.hi, tmp_current.hi, delta); \ - return __builtin_astype(ret, CAST_TYPE); \ + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); \ } __AMDGCN_CLC_SUBGROUP_UP_I64(long, long, l); __AMDGCN_CLC_SUBGROUP_UP_I64(unsigned long, ulong, m); @@ -383,17 +616,75 @@ __AMDGCN_CLC_SUBGROUP_UP_I64(double, double, d); #undef __AMDGCN_CLC_SUBGROUP_UP_I64 // Vector types. +#define __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(TYPE, NUM_ELEMS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleUpINTEL( \ + TYPE previous, TYPE current, unsigned int delta) { \ + TYPE res; \ + for (int i = 0; i < NUM_ELEMS; ++i) { \ + res[i] = __spirv_SubgroupShuffleUpINTEL(previous[i], current[i], delta); \ + } \ + return res; \ + } +// [u]char +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(char2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(char4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(char8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(char16, 16) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uchar2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uchar4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uchar8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uchar16, 16) +// [u]short +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(short2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(short4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(short8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(short16, 16) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ushort2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ushort4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ushort8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ushort16, 16) +// [u]int +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(int2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(int4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(int8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(int16, 16) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uint2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uint4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uint8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(uint16, 16) +// [u]long +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(long2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(long4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(long8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(long16, 16) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong16, 16) +// half +#ifdef cl_khr_fp16 +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half16, 16) +#endif // cl_khr_fp16 +// float +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float16, 16) +// double +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(double2, 2) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(double4, 4) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(double8, 8) +__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(double16, 16) +#undef __AMDGCN_CLC_SUBGROUP_UP_TO_VEC + #define __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(TYPE, MANGLED_SCALAR_TY, NUM_ELEMS) \ _CLC_DEF TYPE \ _Z30__spirv_SubgroupShuffleUpINTELIDv##NUM_ELEMS##_##MANGLED_SCALAR_TY##ET_S1_S1_j( \ TYPE previous, TYPE current, unsigned int delta) { \ - TYPE res; \ - for (int i = 0; i < NUM_ELEMS; ++i) { \ - res[i] = \ - _Z30__spirv_SubgroupShuffleUpINTELI##MANGLED_SCALAR_TY##ET_S0_S0_j( \ - previous[i], current[i], delta); \ - } \ - return res; \ + return __spirv_SubgroupShuffleUpINTEL(previous, current, delta); \ } // [u]char __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(char2, a, 2) @@ -451,10 +742,8 @@ __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(double16, d, 16) #undef __AMDGCN_CLC_SUBGROUP_UP_TO_VEC // Shuffle Down -// int __spirv_SubgroupShuffleDownINTEL(int, int, unsigned int) -_CLC_DEF int -_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(int current, int next, - unsigned int delta) { +_CLC_OVERLOAD _CLC_DEF int +__spirv_SubgroupShuffleDownINTEL(int current, int next, unsigned int delta) { int self = SELF; int size = SUBGROUP_SIZE; @@ -474,8 +763,35 @@ _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(int current, int next, return __builtin_amdgcn_ds_bpermute(index << 2, val); } +// int __spirv_SubgroupShuffleDownINTEL(int, int, unsigned int) +_CLC_DEF int +_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(int current, int next, + unsigned int delta) { + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); +} // Sub 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleDownINTEL( \ + TYPE current, TYPE next, unsigned int delta) { \ + return __spirv_SubgroupShuffleDownINTEL((int)current, (int)next, delta); \ + } +__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(char); +__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned char); +__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short); +__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short); +// half +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DEF half +__spirv_SubgroupShuffleDownINTEL(half current, half next, unsigned int delta) { + unsigned short tmpC = __clc_as_ushort(current); + unsigned short tmpN = __clc_as_ushort(next); + tmpC = __spirv_SubgroupShuffleDownINTEL(tmpC, tmpN, delta); + return __clc_as_half(tmpC); +} +#endif // cl_khr_fp16 +#undef __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32 + // _Z32__spirv_SubgroupShuffleDownINTELIaET_S0_S0_j - char // _Z32__spirv_SubgroupShuffleDownINTELIhET_S0_S0_j - unsigned char // _Z32__spirv_SubgroupShuffleDownINTELIsET_S0_S0_j - short @@ -485,8 +801,7 @@ _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(int current, int next, _CLC_DEF TYPE \ _Z32__spirv_SubgroupShuffleDownINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ TYPE current, TYPE next, unsigned int delta) { \ - return _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j(current, next, \ - delta); \ + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); \ } __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(char, a); __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned char, h); @@ -496,44 +811,60 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short, t); #ifdef cl_khr_fp16 _CLC_DEF half _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j( half current, half next, unsigned int delta) { - unsigned short tmpC = __clc_as_ushort(current); - unsigned short tmpN = __clc_as_ushort(next); - tmpC = (unsigned short)_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j( - (int)tmpC, (int)tmpN, delta); - return __clc_as_half(tmpC); + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); } #endif // cl_khr_fp16 #undef __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32 // 32-bit types. +#define __AMDGCN_CLC_SUBGROUP_DOWN_I32(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleDownINTEL( \ + TYPE current, TYPE next, unsigned int delta) { \ + return __builtin_astype( \ + __spirv_SubgroupShuffleDownINTEL(__clc_as_int(current), \ + __clc_as_int(next), delta), \ + CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_DOWN_I32(unsigned int, uint); +__AMDGCN_CLC_SUBGROUP_DOWN_I32(float, float); +#undef __AMDGCN_CLC_SUBGROUP_DOWN_I32 + // __spirv_SubgroupShuffleDownINTEL - unsigned int // __spirv_SubgroupShuffleDownINTEL - float -#define __AMDGCN_CLC_SUBGROUP_DOWN_I32(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ - _CLC_DEF TYPE \ - _Z32__spirv_SubgroupShuffleDownINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ - TYPE current, TYPE next, unsigned int delta) { \ - return __builtin_astype(_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j( \ - __clc_as_int(current), __clc_as_int(next), delta), \ - CAST_TYPE); \ +#define __AMDGCN_CLC_SUBGROUP_DOWN_I32(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ + _CLC_DEF TYPE \ + _Z32__spirv_SubgroupShuffleDownINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ + TYPE current, TYPE next, unsigned int delta) { \ + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); \ } __AMDGCN_CLC_SUBGROUP_DOWN_I32(unsigned int, uint, j); __AMDGCN_CLC_SUBGROUP_DOWN_I32(float, float, f); #undef __AMDGCN_CLC_SUBGROUP_DOWN_I32 // 64-bit types. +#define __AMDGCN_CLC_SUBGROUP_DOWN_I64(TYPE, CAST_TYPE) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleDownINTEL( \ + TYPE current, TYPE next, unsigned int delta) { \ + int2 tmp_current = __clc_as_int2(current); \ + int2 tmp_next = __clc_as_int2(next); \ + int2 ret; \ + ret.lo = \ + __spirv_SubgroupShuffleDownINTEL(tmp_current.lo, tmp_next.lo, delta); \ + ret.hi = \ + __spirv_SubgroupShuffleDownINTEL(tmp_current.hi, tmp_next.hi, delta); \ + return __builtin_astype(ret, CAST_TYPE); \ + } +__AMDGCN_CLC_SUBGROUP_DOWN_I64(long, long); +__AMDGCN_CLC_SUBGROUP_DOWN_I64(unsigned long, ulong); +__AMDGCN_CLC_SUBGROUP_DOWN_I64(double, double); +#undef __AMDGCN_CLC_SUBGROUP_DOWN_I64 + // double __spirv_SubgroupShuffleDownINTEL(double, unsigned int, int) #define __AMDGCN_CLC_SUBGROUP_DOWN_I64(TYPE, CAST_TYPE, MANGLED_TYPE_NAME) \ _CLC_DEF TYPE \ _Z32__spirv_SubgroupShuffleDownINTELI##MANGLED_TYPE_NAME##ET_S0_S0_j( \ TYPE current, TYPE next, unsigned int delta) { \ - int2 tmp_current = __clc_as_int2(current); \ - int2 tmp_next = __clc_as_int2(next); \ - int2 ret; \ - ret.lo = _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j( \ - tmp_current.lo, tmp_next.lo, delta); \ - ret.hi = _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j( \ - tmp_current.hi, tmp_next.hi, delta); \ - return __builtin_astype(ret, CAST_TYPE); \ + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); \ } __AMDGCN_CLC_SUBGROUP_DOWN_I64(long, long, l); __AMDGCN_CLC_SUBGROUP_DOWN_I64(unsigned long, ulong, m); @@ -541,17 +872,75 @@ __AMDGCN_CLC_SUBGROUP_DOWN_I64(double, double, d); #undef __AMDGCN_CLC_SUBGROUP_DOWN_I64 // Vector types. +#define __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(TYPE, NUM_ELEMS) \ + _CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleDownINTEL( \ + TYPE current, TYPE next, unsigned int delta) { \ + TYPE res; \ + for (int i = 0; i < NUM_ELEMS; ++i) { \ + res[i] = __spirv_SubgroupShuffleDownINTEL(current[i], next[i], delta); \ + } \ + return res; \ + } +// [u]char +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(char2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(char4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(char8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(char16, 16) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uchar2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uchar4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uchar8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uchar16, 16) +// [u]short +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(short2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(short4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(short8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(short16, 16) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ushort2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ushort4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ushort8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ushort16, 16) +// [u]int +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(int2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(int4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(int8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(int16, 16) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uint2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uint4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uint8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(uint16, 16) +// [u]long +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(long2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(long4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(long8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(long16, 16) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong16, 16) +// half +#ifdef cl_khr_fp16 +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half16, 16) +#endif // cl_khr_fp16 +// float +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float16, 16) +// double +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(double2, 2) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(double4, 4) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(double8, 8) +__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(double16, 16) +#undef __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC + #define __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(TYPE, MANGLED_SCALAR_TY, NUM_ELEMS) \ _CLC_DEF TYPE \ _Z32__spirv_SubgroupShuffleDownINTELIDv##NUM_ELEMS##_##MANGLED_SCALAR_TY##ET_S1_S1_j( \ TYPE current, TYPE next, unsigned int delta) { \ - TYPE res; \ - for (int i = 0; i < NUM_ELEMS; ++i) { \ - res[i] = \ - _Z32__spirv_SubgroupShuffleDownINTELI##MANGLED_SCALAR_TY##ET_S0_S0_j( \ - current[i], next[i], delta); \ - } \ - return res; \ + return __spirv_SubgroupShuffleDownINTEL(current, next, delta); \ } // [u]char __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(char2, a, 2) diff --git a/libdevice/nativecpu_utils.cpp b/libdevice/nativecpu_utils.cpp index d14aab811222e..45a6904691783 100644 --- a/libdevice/nativecpu_utils.cpp +++ b/libdevice/nativecpu_utils.cpp @@ -73,10 +73,8 @@ DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL); __spirv_SubgroupBlockReadINTEL(const OCL_GLOBAL PType *Ptr) noexcept { \ return Ptr[__spirv_SubgroupLocalInvocationId()]; \ } \ - template <> \ - __SYCL_CONVERGENT__ DEVICE_EXTERNAL void \ - __spirv_SubgroupBlockWriteINTEL(PType OCL_GLOBAL * ptr, \ - Type v) noexcept { \ + __SYCL_CONVERGENT__ DEVICE_EXTERNAL void __spirv_SubgroupBlockWriteINTEL( \ + PType OCL_GLOBAL *ptr, Type v) noexcept { \ ((Type *)ptr)[__spirv_SubgroupLocalInvocationId()] = v; \ } \ static_assert(true) @@ -252,9 +250,8 @@ DefineBroadCast(int64_t, i64, int64_t); #define DefShuffleINTEL(Type, Sfx, MuxType) \ DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_##Sfx(MuxType val, \ int32_t lid) noexcept; \ - template <> \ - DEVICE_EXTERNAL Type __spirv_SubgroupShuffleINTEL( \ - Type val, unsigned id) noexcept { \ + DEVICE_EXTERNAL Type __spirv_SubgroupShuffleINTEL(Type val, \ + unsigned id) noexcept { \ return (Type)__mux_sub_group_shuffle_##Sfx((MuxType)val, id); \ } \ static_assert(true) @@ -262,8 +259,7 @@ DefineBroadCast(int64_t, i64, int64_t); #define DefShuffleUpINTEL(Type, Sfx, MuxType) \ DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_up_##Sfx( \ MuxType prev, MuxType curr, int32_t delta) noexcept; \ - template <> \ - DEVICE_EXTERNAL Type __spirv_SubgroupShuffleUpINTEL( \ + DEVICE_EXTERNAL Type __spirv_SubgroupShuffleUpINTEL( \ Type prev, Type curr, unsigned delta) noexcept { \ return (Type)__mux_sub_group_shuffle_up_##Sfx((MuxType)prev, \ (MuxType)curr, delta); \ @@ -273,8 +269,7 @@ DefineBroadCast(int64_t, i64, int64_t); #define DefShuffleDownINTEL(Type, Sfx, MuxType) \ DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_down_##Sfx( \ MuxType curr, MuxType next, int32_t delta) noexcept; \ - template <> \ - DEVICE_EXTERNAL Type __spirv_SubgroupShuffleDownINTEL( \ + DEVICE_EXTERNAL Type __spirv_SubgroupShuffleDownINTEL( \ Type curr, Type next, unsigned delta) noexcept { \ return (Type)__mux_sub_group_shuffle_down_##Sfx((MuxType)curr, \ (MuxType)next, delta); \ @@ -284,8 +279,7 @@ DefineBroadCast(int64_t, i64, int64_t); #define DefShuffleXorINTEL(Type, Sfx, MuxType) \ DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_xor_##Sfx(MuxType val, \ int32_t xor_val); \ - template <> \ - DEVICE_EXTERNAL Type __spirv_SubgroupShuffleXorINTEL( \ + DEVICE_EXTERNAL Type __spirv_SubgroupShuffleXorINTEL( \ Type data, unsigned value) noexcept { \ return (Type)__mux_sub_group_shuffle_xor_##Sfx((MuxType)data, value); \ } \ diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index 4003b02485b87..4d99badf8d475 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -395,101 +395,46 @@ __SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) #undef __SPIRV_ATOMIC_UNSIGNED #undef __SPIRV_ATOMIC_XOR -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupShuffleDownINTEL(dataT Current, dataT Next, - uint32_t Delta) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupShuffleUpINTEL(dataT Previous, dataT Current, - uint32_t Delta) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupShuffleXorINTEL(dataT Data, uint32_t Value) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) uint8_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint8_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) uint16_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint16_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) uint32_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint32_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) uint64_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_global)) uint64_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) uint8_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_local)) uint8_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) uint16_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_local)) uint16_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) uint32_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_local)) uint32_t *Ptr, - dataT Data) noexcept; - template __SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT __spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) uint64_t *Ptr) noexcept; -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL void -__spirv_SubgroupBlockWriteINTEL(__attribute__((opencl_local)) uint64_t *Ptr, - dataT Data) noexcept; - template extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int __spirv_FixedSqrtINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, diff --git a/sycl/test/check_device_code/group_load_store_alignment.cpp b/sycl/test/check_device_code/group_load_store_alignment.cpp index b651fb3b8f802..1de28486460d4 100644 --- a/sycl/test/check_device_code/group_load_store_alignment.cpp +++ b/sycl/test/check_device_code/group_load_store_alignment.cpp @@ -82,7 +82,7 @@ SYCL_EXTERNAL void test_load_with_alignment_hint(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_RKSN_SO_SQ_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -96,7 +96,7 @@ SYCL_EXTERNAL void test_store_without_alignment_hint(sycl::sub_group &sg, int v, // CHECK-GLOBAL-NEXT: entry: // CHECK-GLOBAL-NEXT: [[CMP_I_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null // CHECK-GLOBAL-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I_I]]) -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V:%.*]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V:%.*]]) #[[ATTR3]] // CHECK-GLOBAL-NEXT: ret void // SYCL_EXTERNAL void test_store_with_alignment_hint(sycl::sub_group &sg, int v, diff --git a/sycl/test/check_device_code/group_load_store_native_key.cpp b/sycl/test/check_device_code/group_load_store_native_key.cpp index f48601ab8c215..a30b89616cd1a 100644 --- a/sycl/test/check_device_code/group_load_store_native_key.cpp +++ b/sycl/test/check_device_code/group_load_store_native_key.cpp @@ -109,7 +109,7 @@ SYCL_EXTERNAL void test_load_native(sycl::sub_group &sg, plain_ptr p, // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_RKSN_SO_SQ_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -147,7 +147,7 @@ SYCL_EXTERNAL void test_store(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_RKSP_SQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -169,7 +169,7 @@ SYCL_EXTERNAL void test_store(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR3]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jj(ptr addrspace(3) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR3]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_RKSP_SQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void diff --git a/sycl/test/check_device_code/group_shuffle.cpp b/sycl/test/check_device_code/group_shuffle.cpp index 9bd01bf1e2b9f..fbd875b7a0694 100644 --- a/sycl/test/check_device_code/group_shuffle.cpp +++ b/sycl/test/check_device_code/group_shuffle.cpp @@ -27,7 +27,7 @@ using namespace sycl::ext::oneapi; // CHECK-NEXT: [[CONV_I_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I_I]] // CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I_I_I]], align 2, !tbaa [[TBAA14:![0-9]+]], !noalias [[META18:![0-9]+]] -// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6:[0-9]+]], !noalias [[META19:![0-9]+]] +// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6:[0-9]+]], !noalias [[META19:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I_I]] // CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I_I_I12_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META18]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1 @@ -72,7 +72,7 @@ SYCL_EXTERNAL void test_shuffle1(sycl::sub_group &sg, vec *buf, // CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I]] // CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA14]], !noalias [[META32]] -// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6]], !noalias [[META33:![0-9]+]] +// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj(i16 noundef zeroext [[TMP1]], i32 noundef 1) #[[ATTR6]], !noalias [[META33:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I]] // CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META32]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1 @@ -116,7 +116,7 @@ SYCL_EXTERNAL void test_shuffle2(sycl::sub_group &sg, marray *buf, // CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[S_0_I_I]] to i64 // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds [5 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[AGG_TMP14_I]], i64 0, i64 [[CONV_I_I]] // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA14]], !noalias [[META44]] -// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELItET_S0_j(i16 noundef zeroext [[TMP0]], i32 noundef 1) #[[ATTR6]], !noalias [[META45:![0-9]+]] +// CHECK-NEXT: [[CALL4_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @_Z28__spirv_SubgroupShuffleINTELtj(i16 noundef zeroext [[TMP0]], i32 noundef 1) #[[ATTR6]], !noalias [[META45:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX_I13_I_I:%.*]] = getelementptr inbounds [5 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[REF_TMP]], i64 0, i64 [[CONV_I_I]] // CHECK-NEXT: store i16 [[CALL4_I_I_I_I]], ptr [[ARRAYIDX_I13_I_I]], align 2, !tbaa [[TBAA14]], !alias.scope [[META44]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[S_0_I_I]], 1 @@ -138,7 +138,7 @@ SYCL_EXTERNAL void test_shuffle3(sycl::sub_group &sg, marray *buf, // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]] // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64 -// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELImET_S0_j(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]] +// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELmj(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL4_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]] // CHECK-NEXT: ret void @@ -153,7 +153,7 @@ SYCL_EXTERNAL void test_shuffle4(sycl::sub_group &sg, int **buf, size_t id) { // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw ptr addrspace(4), ptr addrspace(4) [[BUF:%.*]], i64 [[ID:%.*]] // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]] // CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(4) [[TMP0]] to i64 -// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELImET_S0_j(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]] +// CHECK-NEXT: [[CALL4_I_I_I:%.*]] = tail call spir_func noundef i64 @_Z28__spirv_SubgroupShuffleINTELmj(i64 noundef [[TMP1]], i32 noundef 1) #[[ATTR6]] // CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[CALL4_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: store ptr addrspace(4) [[TMP2]], ptr addrspace(4) [[ARRAYIDX]], align 8, !tbaa [[TBAA51]] // CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/group_store.cpp b/sycl/test/check_device_code/group_store.cpp index 5a358585e24c0..8a825222339ca 100644 --- a/sycl/test/check_device_code/group_store.cpp +++ b/sycl/test/check_device_code/group_store.cpp @@ -92,7 +92,7 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_RKSP_SQ_SS_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -114,7 +114,7 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, int v, plain_ptr p) { // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jj(ptr addrspace(3) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESR_RKSP_SQ_SS__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESR_RKSP_SQ_SS_.exit: // CHECK-LOCAL-NEXT: ret void @@ -142,7 +142,7 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, int v, // CHECK-GLOBAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] // CHECK-GLOBAL: if.end.i.i: -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jj(ptr addrspace(1) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT]] // CHECK-GLOBAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS1iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_RKSN_SO_SQ_.exit: // CHECK-GLOBAL-NEXT: ret void @@ -164,7 +164,7 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, int v, // CHECK-LOCAL-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrieriii(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT:%.*]] // CHECK-LOCAL: if.end.i.i: -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIjEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jj(ptr addrspace(3) noundef nonnull [[P]], i32 noundef [[V]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEIPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T1_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT2_EEVE4TYPEESP_RKSN_SO_SQ__EXIT]] // CHECK-LOCAL: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEiPU3AS3iNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi0EEEEENSB_INS3_14full_group_keyEJEEENSB_INS9_25native_local_block_io_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T1_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT2_EEvE4typeESP_RKSN_SO_SQ_.exit: // CHECK-LOCAL-NEXT: ret void @@ -306,7 +306,7 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: // CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34:![0-9]+]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mm(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: @@ -362,7 +362,7 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: // CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34:![0-9]+]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS3mT_(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3mm(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: @@ -425,7 +425,7 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: // CHECK-GLOBAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS1mT_(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1mm(ptr addrspace(1) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] // CHECK-GLOBAL: for.body.i.i: @@ -481,7 +481,7 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: // CHECK-LOCAL-NEXT: [[TMP5:%.*]] = load i64, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELImEvPU3AS3mT_(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3mm(ptr addrspace(3) noundef nonnull [[P]], i64 noundef [[TMP5]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEKSLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI0EEEEENSC_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSC_INS3_14FULL_GROUP_KEYEJEEENSC_INSA_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESS_NS0_4SPANISQ_XT1_EEESR_ST__EXIT]] // CHECK-LOCAL: for.body.i.i: @@ -782,7 +782,7 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: // CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv2_j(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: @@ -839,7 +839,7 @@ SYCL_EXTERNAL void test_naive(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: // CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv2_j(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: @@ -902,7 +902,7 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: // CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS1jT_(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1jDv2_j(ptr addrspace(1) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS1INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] // CHECK-GLOBAL: for.body.i.i: @@ -959,7 +959,7 @@ SYCL_EXTERNAL void test_optimized(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: // CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[VALUES_I_I]], align 4, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv2_jEvPU3AS3jT_(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3jDv2_j(ptr addrspace(3) noundef nonnull [[P]], <2 x i32> noundef [[TMP6]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPEILM2EPU3AS3INS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]] // CHECK-LOCAL: for.body.i.i: @@ -1142,7 +1142,7 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: // CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv4_t(ptr addrspace(1) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: @@ -1199,7 +1199,7 @@ SYCL_EXTERNAL void test_accessor_iter_force_optimized(sycl::sub_group &sg, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: // CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv4_tEvPU3AS3tT_(ptr addrspace(3) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv4_t(ptr addrspace(3) noundef nonnull [[P]], <4 x i16> noundef [[TMP6]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM4EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: @@ -1263,7 +1263,7 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-GLOBAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-GLOBAL: for.cond.cleanup.i.i: // CHECK-GLOBAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-GLOBAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS1tDv16_t(ptr addrspace(1) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] // CHECK-GLOBAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-GLOBAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-GLOBAL: for.body.i.i: @@ -1320,7 +1320,7 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span v, // CHECK-LOCAL-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[FOR_COND_CLEANUP_I_I:%.*]] // CHECK-LOCAL: for.cond.cleanup.i.i: // CHECK-LOCAL-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I_I]], align 2, !tbaa [[TBAA34]] -// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS3tT_(ptr addrspace(3) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] +// CHECK-LOCAL-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELPU3AS3tDv16_t(ptr addrspace(3) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR7]] // CHECK-LOCAL-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I_I]]) #[[ATTR9]] // CHECK-LOCAL-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS3SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_25NATIVE_LOCAL_BLOCK_IO_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT]] // CHECK-LOCAL: for.body.i.i: