diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index b95cfa7c1d7f3..9a7788d5afee7 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -15,6 +15,7 @@ #include // for size_t #include // for uint32_t #include +#include // for pair // Convergent attribute #ifdef __SYCL_DEVICE_ONLY__ @@ -1124,6 +1125,13 @@ extern __DPCPP_SYCL_EXTERNAL std::enable_if_t && std::is_unsigned_v, to> __spirv_ConvertPtrToU(from val) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t, __ocl_vec_t> +__spirv_IAddCarry(__ocl_vec_t src0, __ocl_vec_t src1); + +template +extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t, __ocl_vec_t> +__spirv_ISubBorrow(__ocl_vec_t src0, __ocl_vec_t src1); template extern __DPCPP_SYCL_EXTERNAL __spv::__spirv_TaskSequenceINTEL * __spirv_TaskSequenceCreateINTEL(RetT (*f)(ArgsT...), int Pipelined = -1, diff --git a/sycl/include/sycl/ext/intel/esimd/math.hpp b/sycl/include/sycl/ext/intel/esimd/math.hpp index 077db690ba70f..069432c8b8ee8 100644 --- a/sycl/include/sycl/ext/intel/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/esimd/math.hpp @@ -1720,131 +1720,95 @@ bfn(T src0, T src1, T src2) { /// @} sycl_esimd_logical -/// Performs add with carry of 2 unsigned 32-bit vectors. -/// @tparam N size of the vectors -/// @param carry vector that is going to hold resulting carry flag -/// @param src0 first term -/// @param src1 second term -/// @return sum of 2 terms, carry flag is returned through \c carry parameter -template -__ESIMD_API __ESIMD_NS::simd -addc(__ESIMD_NS::simd &carry, __ESIMD_NS::simd src0, - __ESIMD_NS::simd src1) { - std::pair<__ESIMD_DNS::vector_type_t, - __ESIMD_DNS::vector_type_t> - Result = __esimd_addc(src0.data(), src1.data()); - - carry = Result.first; - return Result.second; -} - -/// Performs add with carry of a unsigned 32-bit vector and scalar. -/// @tparam N size of the vectors -/// @param carry vector that is going to hold resulting carry flag -/// @param src0 first term -/// @param src1 second term -/// @return sum of 2 terms, carry flag is returned through \c carry parameter -template -__ESIMD_API __ESIMD_NS::simd -addc(__ESIMD_NS::simd &carry, __ESIMD_NS::simd src0, - uint32_t src1) { - __ESIMD_NS::simd Src1V = src1; - return addc(carry, src0, Src1V); -} - -/// Performs add with carry of a unsigned 32-bit scalar and vector. -/// @tparam N size of the vectors -/// @param carry vector that is going to hold resulting carry flag -/// @param src0 first term -/// @param src1 second term -/// @return sum of 2 terms, carry flag is returned through \c carry parameter -template -__ESIMD_API __ESIMD_NS::simd -addc(__ESIMD_NS::simd &carry, uint32_t src0, - __ESIMD_NS::simd src1) { - __ESIMD_NS::simd Src0V = src0; - return addc(carry, Src0V, src1); -} - -/// Performs add with carry of a unsigned 32-bit scalars. -/// @tparam N size of the vectors -/// @param carry scalar that is going to hold resulting carry flag -/// @param src0 first term -/// @param src1 second term -/// @return sum of 2 terms, carry flag is returned through \c carry parameter -__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) { - __ESIMD_NS::simd CarryV = carry; - __ESIMD_NS::simd Src0V = src0; - __ESIMD_NS::simd Src1V = src1; - __ESIMD_NS::simd Res = addc(CarryV, Src0V, Src1V); - carry = CarryV[0]; - return Res[0]; -} - -/// Performs substraction with borrow of 2 unsigned 32-bit vectors. -/// @tparam N size of the vectors -/// @param borrow vector that is going to hold resulting borrow flag -/// @param src0 first term -/// @param src1 second term -/// @return difference of 2 terms, borrow flag is returned through \c borrow -/// parameter -template -__ESIMD_API __ESIMD_NS::simd -subb(__ESIMD_NS::simd &borrow, __ESIMD_NS::simd src0, - __ESIMD_NS::simd src1) { - std::pair<__ESIMD_DNS::vector_type_t, - __ESIMD_DNS::vector_type_t> - Result = __esimd_subb(src0.data(), src1.data()); - - borrow = Result.first; - return Result.second; -} - -/// Performs substraction with borrow of unsigned 32-bit vector and scalar. -/// @tparam N size of the vectors -/// @param borrow vector that is going to hold resulting borrow flag -/// @param src0 first term -/// @param src1 second term -/// @return difference of 2 terms, borrow flag is returned through \c borrow -/// parameter -template -__ESIMD_API __ESIMD_NS::simd -subb(__ESIMD_NS::simd &borrow, __ESIMD_NS::simd src0, - uint32_t src1) { - __ESIMD_NS::simd Src1V = src1; - return subb(borrow, src0, Src1V); -} - -/// Performs substraction with borrow of unsigned 32-bit scalar and vector. -/// @tparam N size of the vectors -/// @param borrow vector that is going to hold resulting borrow flag -/// @param src0 first term -/// @param src1 second term -/// @return difference of 2 terms, borrow flag is returned through \c borrow -/// parameter -template -__ESIMD_API __ESIMD_NS::simd -subb(__ESIMD_NS::simd &borrow, uint32_t src0, - __ESIMD_NS::simd src1) { - __ESIMD_NS::simd Src0V = src0; - return subb(borrow, Src0V, src1); -} - -/// Performs substraction with borrow of 2 unsigned 32-bit scalars. -/// @tparam N size of the vectors -/// @param borrow scalar that is going to hold resulting borrow flag -/// @param src0 first term -/// @param src1 second term -/// @return difference of 2 terms, borrow flag is returned through \c borrow -/// parameter -__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) { - __ESIMD_NS::simd BorrowV = borrow; - __ESIMD_NS::simd Src0V = src0; - __ESIMD_NS::simd Src1V = src1; - __ESIMD_NS::simd Res = subb(BorrowV, Src0V, Src1V); - borrow = BorrowV[0]; - return Res[0]; -} +#if defined(__SYCL_DEVICE_ONLY__) +#define __ESIMD_ADDC_IMPL(T) \ + std::pair<__ESIMD_DNS::vector_type_t, \ + __ESIMD_DNS::vector_type_t> \ + Result = __spirv_IAddCarry(src0.data(), src1.data()); \ + carry = Result.second; \ + return Result.first; +#else +#define __ESIMD_ADDC_IMPL(T) return 0; +#endif // __SYCL_DEVICE_ONLY__ + +#define __ESIMD_ADDC(T) \ + template \ + __ESIMD_API __ESIMD_NS::simd addc(__ESIMD_NS::simd &carry, \ + __ESIMD_NS::simd src0, \ + __ESIMD_NS::simd src1) { \ + __ESIMD_ADDC_IMPL(T) \ + } \ + template \ + __ESIMD_API __ESIMD_NS::simd addc( \ + __ESIMD_NS::simd &carry, __ESIMD_NS::simd src0, T src1) { \ + __ESIMD_NS::simd Src1V = src1; \ + return addc(carry, src0, Src1V); \ + } \ + template \ + __ESIMD_API __ESIMD_NS::simd addc( \ + __ESIMD_NS::simd &carry, T src0, __ESIMD_NS::simd src1) { \ + __ESIMD_NS::simd Src0V = src0; \ + return addc(carry, Src0V, src1); \ + } \ + __ESIMD_API T addc(T &carry, T src0, T src1) { \ + __ESIMD_NS::simd CarryV = carry; \ + __ESIMD_NS::simd Src0V = src0; \ + __ESIMD_NS::simd Src1V = src1; \ + __ESIMD_NS::simd Res = addc(CarryV, Src0V, Src1V); \ + carry = CarryV[0]; \ + return Res[0]; \ + } + +__ESIMD_ADDC(uint32_t) +__ESIMD_ADDC(uint64_t) + +#undef __ESIMD_ADDC +#undef __ESIMD_ADDC_IMPL + +#if defined(__SYCL_DEVICE_ONLY__) +#define __ESIMD_SUBB_IMPL(T) \ + std::pair<__ESIMD_DNS::vector_type_t, \ + __ESIMD_DNS::vector_type_t> \ + Result = __spirv_ISubBorrow(src0.data(), src1.data()); \ + borrow = Result.second; \ + return Result.first; +#else +#define __ESIMD_SUBB_IMPL(T) return 0; +#endif // __SYCL_DEVICE_ONLY__ + +#define __ESIMD_SUBB(T) \ + template \ + __ESIMD_API __ESIMD_NS::simd subb(__ESIMD_NS::simd &borrow, \ + __ESIMD_NS::simd src0, \ + __ESIMD_NS::simd src1) { \ + __ESIMD_SUBB_IMPL(T) \ + } \ + template \ + __ESIMD_API __ESIMD_NS::simd subb( \ + __ESIMD_NS::simd &borrow, __ESIMD_NS::simd src0, T src1) { \ + __ESIMD_NS::simd Src1V = src1; \ + return subb(borrow, src0, Src1V); \ + } \ + template \ + __ESIMD_API __ESIMD_NS::simd subb( \ + __ESIMD_NS::simd &borrow, T src0, __ESIMD_NS::simd src1) { \ + __ESIMD_NS::simd Src0V = src0; \ + return subb(borrow, Src0V, src1); \ + } \ + __ESIMD_API T subb(T &borrow, T src0, T src1) { \ + __ESIMD_NS::simd BorrowV = borrow; \ + __ESIMD_NS::simd Src0V = src0; \ + __ESIMD_NS::simd Src1V = src1; \ + __ESIMD_NS::simd Res = subb(BorrowV, Src0V, Src1V); \ + borrow = BorrowV[0]; \ + return Res[0]; \ + } // namespace ext::intel::esimd + +__ESIMD_SUBB(uint32_t) +__ESIMD_SUBB(uint64_t) + +#undef __ESIMD_SUBB +#undef __ESIMD_SUBB_IMPL /// rdtsc - get the value of timestamp counter. /// @return the current value of timestamp counter diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp index c156f7381cabb..5bbfd20ac3fa9 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp @@ -94,18 +94,6 @@ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t __esimd_dpasw_nosrc0( __ESIMD_DNS::vector_type_t src1, __ESIMD_DNS::vector_type_t src2) __ESIMD_INTRIN_END; -template -__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t, - __ESIMD_DNS::vector_type_t> -__esimd_addc(__ESIMD_DNS::vector_type_t src0, - __ESIMD_DNS::vector_type_t src1) __ESIMD_INTRIN_END; - -template -__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t, - __ESIMD_DNS::vector_type_t> -__esimd_subb(__ESIMD_DNS::vector_type_t src0, - __ESIMD_DNS::vector_type_t src1) __ESIMD_INTRIN_END; - template __ESIMD_INTRIN __ESIMD_raw_vec_t(T, N) __esimd_bfn(__ESIMD_raw_vec_t(T, N) src0, __ESIMD_raw_vec_t(T, N) src1, diff --git a/sycl/test-e2e/ESIMD/addc.cpp b/sycl/test-e2e/ESIMD/addc.cpp index 956b860e97b53..efd97f5858eac 100644 --- a/sycl/test-e2e/ESIMD/addc.cpp +++ b/sycl/test-e2e/ESIMD/addc.cpp @@ -19,67 +19,98 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -template bool test(sycl::queue Q) { +template +bool test(sycl::queue Q) { static_assert(AIsVector || BIsVector || N == 1, "(Scalar + Scalar) case must have N==1"); + uint32_t ValuesToTryHost32[] = {0, + 1, + static_cast(-1), + 0x7f, + static_cast(-0x7f), + 0x7fff, + static_cast(-0x7fff), + 0x7ffff, + static_cast(-0x7ffff), + 0x7ffffff, + 0x80, + static_cast(-0x80), + 0x8000, + static_cast(-0x8000), + 0x800000, + static_cast(-0x800000), + 0x80000000}; + + uint64_t ValuesToTryHost64[] = {0, + 1, + static_cast(-1), + 0x7f, + static_cast(-0x7f), + 0x7fff, + static_cast(-0x7fff), + 0x7ffff, + static_cast(-0x7ffff), + 0x7ffffff, + static_cast(-0x7ffffff), + 0x7ffffffff, + static_cast(-0x7ffffffff), + 0x80, + static_cast(-0x80), + 0x8000, + static_cast(-0x8000), + 0x800000, + static_cast(-0x800000), + 0x80000000, + static_cast(-0x80000000), + 0x8000000000, + static_cast(-0x8000000000)}; + + uint32_t ValuesToTrySize = 0; + if constexpr (sizeof(T) == 4) { + ValuesToTrySize = sizeof(ValuesToTryHost32) / sizeof(T); + } else if constexpr (sizeof(T) == 8) { + ValuesToTrySize = sizeof(ValuesToTryHost64) / sizeof(T); + } + + std::cout << "Running case: T=" << esimd_test::type_name() << " N = " << N + << ", AIsVector = " << AIsVector << ", BIsVector=" << BIsVector + << std::endl; + + auto ValuesToTryUPtr = esimd_test::usm_malloc_shared(Q, ValuesToTrySize); + T *ValuesToTryPtr = ValuesToTryUPtr.get(); + if constexpr (sizeof(T) == 4) { + memcpy(ValuesToTryPtr, ValuesToTryHost32, ValuesToTrySize * sizeof(T)); + } else if constexpr (sizeof(T) == 8) { + memcpy(ValuesToTryPtr, ValuesToTryHost64, ValuesToTrySize * sizeof(T)); + } - uint32_t ValuesToTryHost[] = {0, - 1, - static_cast(-1), - 0x7f, - static_cast(-0x7f), - 0x7fff, - static_cast(-0x7fff), - 0x7ffff, - static_cast(-0x7ffff), - 0x7ffffff, - 0x80, - static_cast(-0x80), - 0x8000, - static_cast(-0x8000), - 0x800000, - static_cast(-0x800000), - 0x80000000}; - uint32_t ValuesToTrySize = sizeof(ValuesToTryHost) / sizeof(uint32_t); - - std::cout << "Running case: N=" << N << ", AIsVector=" << AIsVector - << ", BIsVector=" << BIsVector << std::endl; - - auto ValuesToTryUPtr = - esimd_test::usm_malloc_shared(Q, ValuesToTrySize); - uint32_t *ValuesToTryPtr = ValuesToTryUPtr.get(); - memcpy(ValuesToTryPtr, ValuesToTryHost, ValuesToTrySize * sizeof(uint32_t)); - - auto ResultsMatrixUPtr = esimd_test::usm_malloc_shared( + auto ResultsMatrixUPtr = esimd_test::usm_malloc_shared( Q, ValuesToTrySize * ValuesToTrySize * N); - auto CarryMatrixUPtr = esimd_test::usm_malloc_shared( + auto CarryMatrixUPtr = esimd_test::usm_malloc_shared( Q, ValuesToTrySize * ValuesToTrySize * N); - uint32_t *ResultsMatrixPtr = ResultsMatrixUPtr.get(); - uint32_t *CarryMatrixPtr = CarryMatrixUPtr.get(); + T *ResultsMatrixPtr = ResultsMatrixUPtr.get(); + T *CarryMatrixPtr = CarryMatrixUPtr.get(); try { Q.single_task([=]() SYCL_ESIMD_KERNEL { - simd VecInc(0, 1); + simd VecInc(0, 1); for (int AI = 0; AI < ValuesToTrySize; AI++) { - using AType = - std::conditional_t, uint32_t>; - uint32_t AScalar = simd( - reinterpret_cast(ValuesToTryPtr) + AI)[0]; + using AType = std::conditional_t, T>; + T AScalar = simd(reinterpret_cast(ValuesToTryPtr) + AI)[0]; AType A = AScalar; if constexpr (AIsVector) A += VecInc; for (int BI = 0; BI < ValuesToTrySize; BI++) { - using BType = - std::conditional_t, uint32_t>; - uint32_t BScalar = simd( - reinterpret_cast(ValuesToTryPtr) + BI)[0]; + using BType = std::conditional_t, T>; + T BScalar = + simd(reinterpret_cast(ValuesToTryPtr) + BI)[0]; BType B = BScalar; if constexpr (BIsVector) B += VecInc; - using ResType = std::conditional_t, uint32_t>; + using ResType = + std::conditional_t, T>; ResType Carry = 0; ResType Res = addc(Carry, A, B); @@ -87,45 +118,47 @@ template bool test(sycl::queue Q) { Carry.copy_to(CarryMatrixPtr + (ValuesToTrySize * AI + BI) * N); Res.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N); } else { - simd Carry1 = Carry; - simd Res1 = Res; + simd Carry1 = Carry; + simd Res1 = Res; Carry1.copy_to(CarryMatrixPtr + (ValuesToTrySize * AI + BI) * N); Res1.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N); } } // end for BI - } // end for AI + } // end for AI }).wait(); } catch (sycl::exception const &e) { std::cout << "SYCL exception caught: " << e.what() << '\n'; - return 1; + return 0; } - using Result64T = uint64_t; + using ResultT = std::conditional_t< + 2 * sizeof(T) == 8, uint64_t, + std::conditional_t<2 * sizeof(T) == 16, __uint128_t, T>>; + int NumErrors = 0; for (int AI = 0; AI < ValuesToTrySize; AI++) { for (int BI = 0; BI < ValuesToTrySize; BI++) { for (int I = 0; I < N; I++) { - uint32_t A = ValuesToTryHost[AI]; + T A = ValuesToTryPtr[AI]; if constexpr (AIsVector) A += I; - uint32_t B = ValuesToTryHost[BI]; + T B = ValuesToTryPtr[BI]; if constexpr (BIsVector) B += I; - Result64T R = static_cast(A); - R += static_cast(B); - - uint32_t ExpectedRes = R & 0xffffffff; - uint32_t ExpectedCarry = (R >> 32) & 0xffffffff; - uint32_t ComputedRes = - ResultsMatrixPtr[(AI * ValuesToTrySize + BI) * N + I]; - uint32_t ComputedCarry = - CarryMatrixPtr[(AI * ValuesToTrySize + BI) * N + I]; + ResultT R = static_cast(A); + R += static_cast(B); + + T ExpectedRes = R & ~(T)(0); + T ExpectedCarry = (R >> (8 * sizeof(T))) & ~(T)(0); + T ComputedRes = ResultsMatrixPtr[(AI * ValuesToTrySize + BI) * N + I]; + T ComputedCarry = CarryMatrixPtr[(AI * ValuesToTrySize + BI) * N + I]; if (ComputedRes != ExpectedRes || ComputedCarry != ExpectedCarry) { std::cout << "Error for (" << AI << "," << BI << "): " << A << " + " << B << " is Computed(" << ComputedCarry << "," << ComputedRes << ") != Expected (" << ExpectedCarry << "," - << ExpectedRes << "), R = " << R << "\n"; + << ExpectedRes << ")" + << "\n"; NumErrors++; } } @@ -135,23 +168,29 @@ template bool test(sycl::queue Q) { return NumErrors == 0; } -int main() { - queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); - auto D = Q.get_device(); - std::cout << "Running on " << D.get_info() << "\n"; - +template bool test(sycl::queue Q) { constexpr bool AIsVector = true; constexpr bool BIsVector = true; bool Pass = true; - Pass &= test<16, AIsVector, BIsVector>(Q); - Pass &= test<8, AIsVector, !BIsVector>(Q); - Pass &= test<4, !AIsVector, BIsVector>(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + + Pass &= test(Q); + return Pass; +} - Pass &= test<1, AIsVector, BIsVector>(Q); - Pass &= test<1, AIsVector, !BIsVector>(Q); - Pass &= test<1, !AIsVector, BIsVector>(Q); +int main() { + queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); + esimd_test::printTestLabel(Q); + bool Pass = true; - Pass &= test<1, !AIsVector, !BIsVector>(Q); + Pass &= test(Q); + Pass &= test(Q); std::cout << (Pass > 0 ? "Passed\n" : "FAILED\n"); return Pass ? 0 : 1; diff --git a/sycl/test-e2e/ESIMD/subb.cpp b/sycl/test-e2e/ESIMD/subb.cpp index 0174501b6cf02..67eeb6088feda 100644 --- a/sycl/test-e2e/ESIMD/subb.cpp +++ b/sycl/test-e2e/ESIMD/subb.cpp @@ -19,68 +19,99 @@ using namespace sycl; using namespace sycl::ext::intel::esimd; -template bool test(sycl::queue Q) { +template +bool test(sycl::queue Q) { static_assert(AIsVector || BIsVector || N == 1, "(Scalar - Scalar) case must have N==1"); - uint32_t ValuesToTryHost[] = {0, - 1, - static_cast(-1), - 0x7f, - static_cast(-0x7f), - 0x7fff, - static_cast(-0x7fff), - 0x7ffff, - static_cast(-0x7ffff), - 0x7ffffff, - 0x80, - static_cast(-0x80), - 0x8000, - static_cast(-0x8000), - 0x800000, - static_cast(-0x800000), - 0x80000000}; - ; - uint32_t ValuesToTrySize = sizeof(ValuesToTryHost) / sizeof(uint32_t); - - std::cout << "Running case: N=" << N << ", AIsVector=" << AIsVector - << ", BIsVector=" << BIsVector << std::endl; - - auto ValuesToTryUPtr = - esimd_test::usm_malloc_shared(Q, ValuesToTrySize); - uint32_t *ValuesToTryPtr = ValuesToTryUPtr.get(); - memcpy(ValuesToTryPtr, ValuesToTryHost, ValuesToTrySize * sizeof(uint32_t)); - - auto ResultsMatrixUPtr = esimd_test::usm_malloc_shared( + uint32_t ValuesToTryHost32[] = {0, + 1, + static_cast(-1), + 0x7f, + static_cast(-0x7f), + 0x7fff, + static_cast(-0x7fff), + 0x7ffff, + static_cast(-0x7ffff), + 0x7ffffff, + 0x80, + static_cast(-0x80), + 0x8000, + static_cast(-0x8000), + 0x800000, + static_cast(-0x800000), + 0x80000000}; + + uint64_t ValuesToTryHost64[] = {0, + 1, + static_cast(-1), + 0x7f, + static_cast(-0x7f), + 0x7fff, + static_cast(-0x7fff), + 0x7ffff, + static_cast(-0x7ffff), + 0x7ffffff, + static_cast(-0x7ffffff), + 0x7ffffffff, + static_cast(-0x7ffffffff), + 0x80, + static_cast(-0x80), + 0x8000, + static_cast(-0x8000), + 0x800000, + static_cast(-0x800000), + 0x80000000, + static_cast(-0x80000000), + 0x8000000000, + static_cast(-0x8000000000)}; + + uint32_t ValuesToTrySize = 0; + if constexpr (sizeof(T) == 4) { + ValuesToTrySize = sizeof(ValuesToTryHost32) / sizeof(T); + } else if constexpr (sizeof(T) == 8) { + ValuesToTrySize = sizeof(ValuesToTryHost64) / sizeof(T); + } + + std::cout << "Running case: T=" << esimd_test::type_name() << " N = " << N + << ", AIsVector = " << AIsVector << ", BIsVector=" << BIsVector + << std::endl; + + auto ValuesToTryUPtr = esimd_test::usm_malloc_shared(Q, ValuesToTrySize); + T *ValuesToTryPtr = ValuesToTryUPtr.get(); + if constexpr (sizeof(T) == 4) { + memcpy(ValuesToTryPtr, ValuesToTryHost32, ValuesToTrySize * sizeof(T)); + } else if constexpr (sizeof(T) == 8) { + memcpy(ValuesToTryPtr, ValuesToTryHost64, ValuesToTrySize * sizeof(T)); + } + + auto ResultsMatrixUPtr = esimd_test::usm_malloc_shared( Q, ValuesToTrySize * ValuesToTrySize * N); - auto BorrowMatrixUPtr = esimd_test::usm_malloc_shared( + auto BorrowMatrixUPtr = esimd_test::usm_malloc_shared( Q, ValuesToTrySize * ValuesToTrySize * N); - uint32_t *ResultsMatrixPtr = ResultsMatrixUPtr.get(); - uint32_t *BorrowMatrixPtr = BorrowMatrixUPtr.get(); + T *ResultsMatrixPtr = ResultsMatrixUPtr.get(); + T *BorrowMatrixPtr = BorrowMatrixUPtr.get(); try { Q.single_task([=]() SYCL_ESIMD_KERNEL { - simd VecInc(0, 1); + simd VecInc(0, 1); for (int AI = 0; AI < ValuesToTrySize; AI++) { - using AType = - std::conditional_t, uint32_t>; - uint32_t AScalar = simd( - reinterpret_cast(ValuesToTryPtr) + AI)[0]; + using AType = std::conditional_t, T>; + T AScalar = simd(reinterpret_cast(ValuesToTryPtr) + AI)[0]; AType A = AScalar; if constexpr (AIsVector) A += VecInc; for (int BI = 0; BI < ValuesToTrySize; BI++) { - using BType = - std::conditional_t, uint32_t>; - uint32_t BScalar = simd( - reinterpret_cast(ValuesToTryPtr) + BI)[0]; + using BType = std::conditional_t, T>; + T BScalar = + simd(reinterpret_cast(ValuesToTryPtr) + BI)[0]; BType B = BScalar; if constexpr (BIsVector) B += VecInc; - using ResType = std::conditional_t, uint32_t>; + using ResType = + std::conditional_t, T>; ResType Borrow = 0; ResType Res = subb(Borrow, A, B); @@ -88,35 +119,37 @@ template bool test(sycl::queue Q) { Borrow.copy_to(BorrowMatrixPtr + (ValuesToTrySize * AI + BI) * N); Res.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N); } else { - simd Borrow1 = Borrow; - simd Res1 = Res; + simd Borrow1 = Borrow; + simd Res1 = Res; Borrow1.copy_to(BorrowMatrixPtr + (ValuesToTrySize * AI + BI) * N); Res1.copy_to(ResultsMatrixPtr + (ValuesToTrySize * AI + BI) * N); } } // end for BI - } // end for AI + } // end for AI }).wait(); } catch (sycl::exception const &e) { std::cout << "SYCL exception caught: " << e.what() << '\n'; - return 1; + return 0; } - using Result64T = uint64_t; + using ResultT = std::conditional_t< + 2 * sizeof(T) == 8, uint64_t, + std::conditional_t<2 * sizeof(T) == 16, __uint128_t, T>>; int NumErrors = 0; for (int AI = 0; AI < ValuesToTrySize; AI++) { for (int BI = 0; BI < ValuesToTrySize; BI++) { for (int I = 0; I < N; I++) { - uint32_t A = ValuesToTryHost[AI]; + T A = ValuesToTryPtr[AI]; if constexpr (AIsVector) A += I; - uint32_t B = ValuesToTryHost[BI]; + T B = ValuesToTryPtr[BI]; if constexpr (BIsVector) B += I; - Result64T R = static_cast(A); - R -= static_cast(B); + ResultT R = static_cast(A); + R -= static_cast(B); - uint32_t ExpectedRes = R & 0xffffffff; + uint32_t ExpectedRes = R & ~(T)(0); uint32_t ExpectedBorrow = A < B; uint32_t ComputedRes = ResultsMatrixPtr[(AI * ValuesToTrySize + BI) * N + I]; @@ -127,7 +160,7 @@ template bool test(sycl::queue Q) { std::cout << "Error for (" << AI << "," << BI << "): " << std::hex << A << " - " << B << " is Computed(" << ComputedBorrow << "," << ComputedRes << ") != Expected (" << ExpectedBorrow - << "," << ExpectedRes << "), R = " << R << std::dec << "\n"; + << "," << ExpectedRes << ")" << std::dec << "\n"; NumErrors++; } } @@ -137,23 +170,30 @@ template bool test(sycl::queue Q) { return NumErrors == 0; } +template bool test(sycl::queue Q) { + constexpr bool AIsVector = true; + constexpr bool BIsVector = true; + bool Pass = true; + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + + Pass &= test(Q); + return Pass; +} + int main() { queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); auto D = Q.get_device(); std::cout << "Running on " << D.get_info() << "\n"; - constexpr bool AIsVector = true; - constexpr bool BIsVector = true; bool Pass = true; - Pass &= test<16, AIsVector, BIsVector>(Q); - Pass &= test<8, AIsVector, !BIsVector>(Q); - Pass &= test<4, !AIsVector, BIsVector>(Q); - - Pass &= test<1, AIsVector, BIsVector>(Q); - Pass &= test<1, AIsVector, !BIsVector>(Q); - Pass &= test<1, !AIsVector, BIsVector>(Q); - - Pass &= test<1, !AIsVector, !BIsVector>(Q); + Pass &= test(Q); + Pass &= test(Q); std::cout << (Pass > 0 ? "Passed\n" : "FAILED\n"); return Pass ? 0 : 1;