Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
608bb81
Replace use of intrinsics with spirv functions for addc/subb
fineg74 Mar 21, 2024
608721d
Restore lowering of esimd intrinsics
fineg74 Apr 23, 2024
52ceded
Refactor implementation
fineg74 Apr 23, 2024
7b90096
Update tests
fineg74 Apr 23, 2024
0e8b39a
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Apr 23, 2024
875bcdb
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Apr 25, 2024
9d9efa7
Remove support for 8/16 bit data
fineg74 Apr 25, 2024
9b953dd
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Apr 25, 2024
9bbb5d2
Merge branch 'addc' of https://github.com/fineg74/llvm into addc
fineg74 Apr 25, 2024
ad0cb74
Fix clang-format issue
fineg74 Apr 25, 2024
a30f641
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Apr 26, 2024
7b60952
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 May 2, 2024
d9a395c
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 May 6, 2024
e596e18
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 May 14, 2024
5cee161
Fix clang-format and a build break
fineg74 May 14, 2024
1f4124f
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 May 29, 2024
43ecf76
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 May 31, 2024
a26049a
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Jul 7, 2024
1a3e5f6
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Jul 17, 2024
6e32c41
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Jul 30, 2024
5d48b4e
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Aug 2, 2024
c6e9852
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Aug 14, 2024
a9eff15
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Aug 23, 2024
5f99e62
Fix test issue
fineg74 Aug 23, 2024
257bad9
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Sep 11, 2024
a2a2b5d
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Oct 17, 2024
b7a027d
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Oct 23, 2024
82702a5
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Nov 12, 2024
ffe5c8f
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Dec 2, 2024
5a949e1
Merge remote-tracking branch 'origin/sycl' into addc
fineg74 Dec 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 3 additions & 42 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,8 +653,6 @@ class ESIMDIntrinDescTable {
{"__spirv_ConvertFToBF16INTEL", {a(0)}}},
{"__devicelib_ConvertBF16ToFINTEL",
{"__spirv_ConvertBF16ToFINTEL", {a(0)}}},
{"addc", {"addc", {l(0)}}},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's keep the lowering in LowerESIMD.cpp for a while - see the reasoning in other PR: #12935

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

{"subb", {"subb", {l(0)}}},
{"bfn", {"bfn", {a(0), a(1), a(2), t(0)}}},
{"srnd", {"srnd", {a(0), a(1)}}},
{"timestamp",{"timestamp",{}}}};
Expand All @@ -664,13 +662,6 @@ class ESIMDIntrinDescTable {
const IntrinTable &getTable() { return Table; }
};

static bool isStructureReturningFunction(StringRef FunctionName) {
return llvm::StringSwitch<bool>(FunctionName)
.Case("addc", true)
.Case("subb", true)
.Default(false);
}

// The C++11 "magic static" idiom to lazily initialize the ESIMD intrinsic table
static const IntrinTable &getIntrinTable() {
static ESIMDIntrinDescTable TheTable;
Expand Down Expand Up @@ -1417,8 +1408,6 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
SmallVector<Value *, 16> GenXArgs;
createESIMDIntrinsicArgs(Desc, GenXArgs, CI, FE);
Function *NewFDecl = nullptr;
bool DoesFunctionReturnStructure =
isStructureReturningFunction(Desc.GenXSpelling);
if (isDevicelibFunction(F->getName())) {
NewFDecl = createDeviceLibESIMDDeclaration(Desc, GenXArgs, CI);
} else if (Desc.GenXSpelling.rfind("test.src.", 0) == 0) {
Expand All @@ -1429,17 +1418,8 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
GenXIntrinsic::getGenXIntrinsicPrefix() + Desc.GenXSpelling + Suffix);

SmallVector<Type *, 16> GenXOverloadedTypes;
if (GenXIntrinsic::isOverloadedRet(ID)) {
if (DoesFunctionReturnStructure) {
// TODO implement more generic handling of returned structure
// current code assumes that returned code has 2 members of the
// same type as arguments.
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
GenXOverloadedTypes.push_back(GenXArgs[1]->getType());
} else {
GenXOverloadedTypes.push_back(CI.getType());
}
}
if (GenXIntrinsic::isOverloadedRet(ID))
GenXOverloadedTypes.push_back(CI.getType());
for (unsigned i = 0; i < GenXArgs.size(); ++i)
if (GenXIntrinsic::isOverloadedArg(ID, i))
GenXOverloadedTypes.push_back(GenXArgs[i]->getType());
Expand All @@ -1449,31 +1429,12 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
}

// llvm::Attribute::ReadNone must not be used for call statements anymore.
Instruction *NewInst = nullptr;
AddrSpaceCastInst *CastInstruction = nullptr;
if (DoesFunctionReturnStructure) {
llvm::esimd::assert_and_diag(
isa<AddrSpaceCastInst>(GenXArgs[0]),
"Unexpected instruction for returning a structure from a function.");
CastInstruction = static_cast<AddrSpaceCastInst *>(GenXArgs[0]);
// Remove 1st argument that is used to return the structure
GenXArgs.erase(GenXArgs.begin());
}

CallInst *NewCI = IntrinsicInst::Create(
NewFDecl, GenXArgs,
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
&CI);
NewCI->setDebugLoc(CI.getDebugLoc());
if (DoesFunctionReturnStructure) {
IRBuilder<> Builder(&CI);

NewInst = Builder.CreateStore(
NewCI, Builder.CreateBitCast(CastInstruction->getPointerOperand(),
NewCI->getType()->getPointerTo()));
} else {
NewInst = addCastInstIfNeeded(&CI, NewCI);
}
Instruction *NewInst = addCastInstIfNeeded(&CI, NewCI);

CI.replaceAllUsesWith(NewInst);
CI.eraseFromParent();
Expand Down
8 changes: 8 additions & 0 deletions sycl/include/CL/__spirv/spirv_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,14 @@ extern __DPCPP_SYCL_EXTERNAL RetT __spirv_TaskSequenceGetINTEL(
extern __DPCPP_SYCL_EXTERNAL void __spirv_TaskSequenceReleaseINTEL(
__spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept;

template <typename T, int N>
extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t<T, N>, __ocl_vec_t<T, N>>
__spirv_IAddCarry(__ocl_vec_t<T, N> src0, __ocl_vec_t<T, N> src1);

template <typename T, int N>
extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t<T, N>, __ocl_vec_t<T, N>>
__spirv_ISubBorrow(__ocl_vec_t<T, N> src0, __ocl_vec_t<T, N> src1);

#else // if !__SYCL_DEVICE_ONLY__

template <typename dataT>
Expand Down
151 changes: 81 additions & 70 deletions sycl/include/sycl/ext/intel/esimd/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ __esimd_abs_common_internal(simd<TArg, SZ> src0) {
}

template <typename TRes, typename TArg>
ESIMD_NODEBUG ESIMD_INLINE
std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
detail::is_esimd_scalar<TArg>::value,
TRes>
ESIMD_NODEBUG
ESIMD_INLINE std::enable_if_t<detail::is_esimd_scalar<TRes>::value &&
detail::is_esimd_scalar<TArg>::value,
TRes>
__esimd_abs_common_internal(TArg src0) {
simd<TArg, 1> Src0 = src0;
simd<TArg, 1> Result = __esimd_abs_common_internal<TArg>(Src0);
Expand Down Expand Up @@ -641,8 +641,8 @@ __ESIMD_API RT trunc(float src0, Sat sat = {}) {
/// @param src0 The input mask.
/// @return The packed mask as an <code>unsgined int</code> 32-bit value.
template <int N>
ESIMD_NODEBUG ESIMD_INLINE
std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
ESIMD_NODEBUG
ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), uint>
pack_mask(simd_mask<N> src0) {
return __esimd_pack_mask<N>(src0.data());
}
Expand All @@ -655,8 +655,8 @@ ESIMD_NODEBUG ESIMD_INLINE
/// @param src0 The input packed mask.
/// @return The unpacked mask as a simd_mask object.
template <int N>
ESIMD_NODEBUG ESIMD_INLINE
std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
ESIMD_NODEBUG
ESIMD_INLINE std::enable_if_t<(N == 8 || N == 16 || N == 32), simd_mask<N>>
unpack_mask(uint src0) {
return __esimd_unpack_mask<N>(src0);
}
Expand Down Expand Up @@ -698,10 +698,9 @@ ballot(simd<T, N> mask) {
/// @return a vector of \c uint32_t, where each element is set to bit count of
/// the corresponding element of the source operand.
template <typename T, int N>
ESIMD_NODEBUG ESIMD_INLINE
std::enable_if_t<std::is_integral<T>::value && (sizeof(T) <= 4),
simd<uint32_t, N>>
cbit(simd<T, N> src) {
ESIMD_NODEBUG ESIMD_INLINE std::enable_if_t<
std::is_integral<T>::value && (sizeof(T) <= 4), simd<uint32_t, N>>
cbit(simd<T, N> src) {
return __esimd_cbit<T, N>(src.data());
}

Expand Down Expand Up @@ -1176,128 +1175,140 @@ bfn(T src0, T src1, T src2) {

/// @} sycl_esimd_logical

/// Performs add with carry of 2 unsigned 32-bit vectors.
/// Performs add with carry of 2 unsigned integral vectors.
/// @tparam N size of the vectors
/// @param carry vector that is going to hold resulting carry flag
/// @param src0 first term
/// @param src1 second term
/// @return sum of 2 terms, carry flag is returned through \c carry parameter
template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
__ESIMD_DNS::vector_type_t<uint32_t, N>>
Result = __esimd_addc<uint32_t, N>(src0.data(), src1.data());

carry = Result.first;
return Result.second;
template <int N, typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>,
__ESIMD_NS::simd<T, N>>
addc(__ESIMD_NS::simd<T, N> &carry, __ESIMD_NS::simd<T, N> src0,
__ESIMD_NS::simd<T, N> src1) {
#ifdef __SYCL_DEVICE_ONLY__
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
Result = __spirv_IAddCarry<T, N>(src0.data(), src1.data());

carry = Result.second;
return Result.first;
#else
return 0;
#endif // __SYCL_DEVICE_ONLY__
}

/// Performs add with carry of a unsigned 32-bit vector and scalar.
/// Performs add with carry of a unsigned integral vector and scalar.
/// @tparam N size of the vectors
/// @param carry vector that is going to hold resulting carry flag
/// @param src0 first term
/// @param src1 second term
/// @return sum of 2 terms, carry flag is returned through \c carry parameter
template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, __ESIMD_NS::simd<uint32_t, N> src0,
uint32_t src1) {
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
template <int N, typename T>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the help of a macro would make it better: a) there will be no need for SFINAE and b) the templates would not require extra parameter T, which goes in very unusual order - after N - (usual order is <T, N>). Finally, passing simd_view as the 1 arg to vector+scalar op will require specifying only N, instead of <N, T>.
The idea is: (it may need some adaptation because of using SYCL_DEVICE_ONLY inside it:

#define __ESIMD_ADDC(T) \
template <int N> \
__ESIMD_API __ESIMD_NS::simd<T, N>> \
addc(__ESIMD_NS::simd<T, N> &carry, __ESIMD_NS::simd<T, N> src0, \
     __ESIMD_NS::simd<T, N> src1) { \
#ifdef __SYCL_DEVICE_ONLY__ \
  std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>> \
      Result = __spirv_IAddCarry<T, N>(src0.data(), src1.data()); \
  carry = Result.second; \
  return Result.first; \
#else \
  return 0; \
#endif // __SYCL_DEVICE_ONLY__ \
} \
/* OTHER 3 variants here (vec+scalar, scalar+vec, sccalar+scalar  */ \

__ESIMD_ADDC(uint8_t)
__ESIMD_ADDC(uint16_t)
__ESIMD_ADDC(uint32_t)
__ESIMD_ADDC(uint64_t)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>,
__ESIMD_NS::simd<T, N>>
addc(__ESIMD_NS::simd<T, N> &carry, __ESIMD_NS::simd<T, N> src0, T src1) {
__ESIMD_NS::simd<T, N> Src1V = src1;
return addc(carry, src0, Src1V);
}

/// Performs add with carry of a unsigned 32-bit scalar and vector.
/// Performs add with carry of a unsigned integral scalar and vector.
/// @tparam N size of the vectors
/// @param carry vector that is going to hold resulting carry flag
/// @param src0 first term
/// @param src1 second term
/// @return sum of 2 terms, carry flag is returned through \c carry parameter
template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
addc(__ESIMD_NS::simd<uint32_t, N> &carry, uint32_t src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
template <int N, typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>,
__ESIMD_NS::simd<T, N>>
addc(__ESIMD_NS::simd<T, N> &carry, T src0, __ESIMD_NS::simd<T, N> src1) {
__ESIMD_NS::simd<T, N> Src0V = src0;
return addc(carry, Src0V, src1);
}

/// Performs add with carry of a unsigned 32-bit scalars.
/// Performs add with carry of a unsigned integral scalars.
/// @tparam N size of the vectors
/// @param carry scalar that is going to hold resulting carry flag
/// @param src0 first term
/// @param src1 second term
/// @return sum of 2 terms, carry flag is returned through \c carry parameter
__ESIMD_API uint32_t addc(uint32_t &carry, uint32_t src0, uint32_t src1) {
__ESIMD_NS::simd<uint32_t, 1> CarryV = carry;
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
__ESIMD_NS::simd<uint32_t, 1> Res = addc(CarryV, Src0V, Src1V);
template <typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>, T>
addc(T &carry, T src0, T src1) {
__ESIMD_NS::simd<T, 1> CarryV = carry;
__ESIMD_NS::simd<T, 1> Src0V = src0;
__ESIMD_NS::simd<T, 1> Src1V = src1;
__ESIMD_NS::simd<T, 1> Res = addc(CarryV, Src0V, Src1V);
carry = CarryV[0];
return Res[0];
}

/// Performs substraction with borrow of 2 unsigned 32-bit vectors.
/// Performs substraction with borrow of 2 unsigned integral vectors.
/// @tparam N size of the vectors
/// @param borrow vector that is going to hold resulting borrow flag
/// @param src0 first term
/// @param src1 second term
/// @return difference of 2 terms, borrow flag is returned through \c borrow
/// parameter
template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
std::pair<__ESIMD_DNS::vector_type_t<uint32_t, N>,
__ESIMD_DNS::vector_type_t<uint32_t, N>>
Result = __esimd_subb<uint32_t, N>(src0.data(), src1.data());

borrow = Result.first;
return Result.second;
template <int N, typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>,
__ESIMD_NS::simd<T, N>>
subb(__ESIMD_NS::simd<T, N> &borrow, __ESIMD_NS::simd<T, N> src0,
__ESIMD_NS::simd<T, N> src1) {
#ifdef __SYCL_DEVICE_ONLY__
std::pair<__ESIMD_DNS::vector_type_t<T, N>, __ESIMD_DNS::vector_type_t<T, N>>
Result = __spirv_ISubBorrow<T, N>(src0.data(), src1.data());

borrow = Result.second;
return Result.first;
#else
return 0;
#endif // __SYCL_DEVICE_ONLY__
}

/// Performs substraction with borrow of unsigned 32-bit vector and scalar.
/// Performs substraction with borrow of unsigned integral vector and scalar.
/// @tparam N size of the vectors
/// @param borrow vector that is going to hold resulting borrow flag
/// @param src0 first term
/// @param src1 second term
/// @return difference of 2 terms, borrow flag is returned through \c borrow
/// parameter
template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, __ESIMD_NS::simd<uint32_t, N> src0,
uint32_t src1) {
__ESIMD_NS::simd<uint32_t, N> Src1V = src1;
template <int N, typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>,
__ESIMD_NS::simd<T, N>>
subb(__ESIMD_NS::simd<T, N> &borrow, __ESIMD_NS::simd<T, N> src0, T src1) {
__ESIMD_NS::simd<T, N> Src1V = src1;
return subb(borrow, src0, Src1V);
}

/// Performs substraction with borrow of unsigned 32-bit scalar and vector.
/// Performs substraction with borrow of unsigned integral scalar and vector.
/// @tparam N size of the vectors
/// @param borrow vector that is going to hold resulting borrow flag
/// @param src0 first term
/// @param src1 second term
/// @return difference of 2 terms, borrow flag is returned through \c borrow
/// parameter
template <int N>
__ESIMD_API __ESIMD_NS::simd<uint32_t, N>
subb(__ESIMD_NS::simd<uint32_t, N> &borrow, uint32_t src0,
__ESIMD_NS::simd<uint32_t, N> src1) {
__ESIMD_NS::simd<uint32_t, N> Src0V = src0;
template <int N, typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>,
__ESIMD_NS::simd<T, N>>
subb(__ESIMD_NS::simd<T, N> &borrow, T src0, __ESIMD_NS::simd<T, N> src1) {
__ESIMD_NS::simd<T, N> Src0V = src0;
return subb(borrow, Src0V, src1);
}

/// Performs substraction with borrow of 2 unsigned 32-bit scalars.
/// Performs substraction with borrow of 2 unsigned integral scalars.
/// @tparam N size of the vectors
/// @param borrow scalar that is going to hold resulting borrow flag
/// @param src0 first term
/// @param src1 second term
/// @return difference of 2 terms, borrow flag is returned through \c borrow
/// parameter
__ESIMD_API uint32_t subb(uint32_t &borrow, uint32_t src0, uint32_t src1) {
__ESIMD_NS::simd<uint32_t, 1> BorrowV = borrow;
__ESIMD_NS::simd<uint32_t, 1> Src0V = src0;
__ESIMD_NS::simd<uint32_t, 1> Src1V = src1;
__ESIMD_NS::simd<uint32_t, 1> Res = subb(BorrowV, Src0V, Src1V);
template <typename T>
__ESIMD_API std::enable_if_t<!std::is_signed_v<T> && std::is_integral_v<T>, T>
subb(T &borrow, T src0, T src1) {
__ESIMD_NS::simd<T, 1> BorrowV = borrow;
__ESIMD_NS::simd<T, 1> Src0V = src0;
__ESIMD_NS::simd<T, 1> Src1V = src1;
__ESIMD_NS::simd<T, 1> Res = subb(BorrowV, Src0V, Src1V);
borrow = BorrowV[0];
return Res[0];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,18 +128,6 @@ __ESIMD_INTRIN __ESIMD_DNS::vector_type_t<T, N> __esimd_dpasw_nosrc0(
__ESIMD_DNS::vector_type_t<T1, N1> src1,
__ESIMD_DNS::vector_type_t<T2, N2> src2) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
__ESIMD_DNS::vector_type_t<T, N>>
__esimd_addc(__ESIMD_DNS::vector_type_t<T, N> src0,
__ESIMD_DNS::vector_type_t<T, N> src1) __ESIMD_INTRIN_END;

template <typename T, int N>
__ESIMD_INTRIN std::pair<__ESIMD_DNS::vector_type_t<T, N>,
__ESIMD_DNS::vector_type_t<T, N>>
__esimd_subb(__ESIMD_DNS::vector_type_t<T, N> src0,
__ESIMD_DNS::vector_type_t<T, N> src1) __ESIMD_INTRIN_END;

template <uint8_t FuncControl, typename T, int N>
__ESIMD_INTRIN __ESIMD_raw_vec_t(T, N)
__esimd_bfn(__ESIMD_raw_vec_t(T, N) src0, __ESIMD_raw_vec_t(T, N) src1,
Expand Down