Skip to content

Commit 43f6332

Browse files
authored
[ESIMD] Allow full autodeduction for acc gather and slm_gather APIs accepting simd_view (#13956)
1 parent 0d1dd2d commit 43f6332

File tree

2 files changed

+441
-9
lines changed

2 files changed

+441
-9
lines changed

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4020,6 +4020,92 @@ gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
40204020
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
40214021
}
40224022

4023+
/// template <int VS, typename T, int N, typename OffsetSimdViewT,
4024+
// typename PropertyListT = empty_properties_t>
4025+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
4026+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
4027+
/// PropertyListT props = {});
4028+
/// This function is identical to (lacc-ga-1) except that the \p byte_offsets
4029+
/// is represented as \c simd_view.
4030+
template <
4031+
int VS, typename T, int N, typename AccessorT, typename OffsetSimdViewT,
4032+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
4033+
__ESIMD_API std::enable_if_t<
4034+
(detail::is_device_accessor_with_v<AccessorT,
4035+
detail::accessor_mode_cap::can_read> &&
4036+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
4037+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4038+
simd<T, N>>
4039+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
4040+
simd<T, N> pass_thru, PropertyListT props = {}) {
4041+
static_assert(N / VS ==
4042+
OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
4043+
"Size of pass_thru parameter must correspond to the size of "
4044+
"byte_offsets parameter.");
4045+
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
4046+
}
4047+
4048+
/// template <int VS = 1, typename AccessorT,
4049+
/// typename OffsetSimdViewT, typename PassThruSimdViewT,
4050+
/// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
4051+
/// typename T = PassThruSimdViewT::value_type::element_type,
4052+
/// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>>
4053+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
4054+
/// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
4055+
/// PropertyListT props = {});
4056+
/// This function is identical to (lacc-ga-1) except that the \p byte_offsets
4057+
/// and \p pass_thru are represented as \c simd_view.
4058+
template <
4059+
int VS = 1, typename AccessorT, typename OffsetSimdViewT,
4060+
typename PassThruSimdViewT,
4061+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
4062+
typename T = PassThruSimdViewT::value_type::element_type,
4063+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
4064+
__ESIMD_API std::enable_if_t<
4065+
(detail::is_device_accessor_with_v<AccessorT,
4066+
detail::accessor_mode_cap::can_read> &&
4067+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
4068+
detail::is_simd_view_type_v<PassThruSimdViewT> &&
4069+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4070+
simd<T, N>>
4071+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
4072+
PassThruSimdViewT pass_thru, PropertyListT props = {}) {
4073+
static_assert(N / VS ==
4074+
OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
4075+
"Size of pass_thru parameter must correspond to the size of "
4076+
"byte_offsets parameter.");
4077+
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru.read(),
4078+
props);
4079+
}
4080+
4081+
/// template <int VS = 1, typename AccessorT,
4082+
/// typename OffsetT, typename PassThruSimdViewT,
4083+
/// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
4084+
/// typename T = PassThruSimdViewT::value_type::element_type,
4085+
/// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>>
4086+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
4087+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
4088+
/// PropertyListT props = {});
4089+
/// This function is identical to (lacc-ga-1) except that the \p byte_offsets
4090+
/// is represented as \c simd_view.
4091+
template <
4092+
int VS = 1, typename AccessorT, typename OffsetT,
4093+
typename PassThruSimdViewT,
4094+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
4095+
typename T = PassThruSimdViewT::value_type::element_type,
4096+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
4097+
__ESIMD_API std::enable_if_t<
4098+
(detail::is_device_accessor_with_v<AccessorT,
4099+
detail::accessor_mode_cap::can_read> &&
4100+
detail::is_simd_view_type_v<PassThruSimdViewT> &&
4101+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
4102+
simd<T, N>>
4103+
gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
4104+
simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
4105+
PropertyListT props = {}) {
4106+
return gather<T, N, VS>(acc, byte_offsets, mask, pass_thru.read(), props);
4107+
}
4108+
40234109
/// template <typename T, int N, int VS = 1, typename AccessorT,
40244110
/// typename OffsetSimdViewT,
40254111
// typename PropertyListT = empty_properties_t>
@@ -5100,6 +5186,140 @@ slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
51005186
return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru, props);
51015187
}
51025188

5189+
/// template <int VS, typename T, int N, typename OffsetSimdViewT,
5190+
/// typename PropertyListT = empty_props_t>
5191+
/// simd <T, N> slm_gather(
5192+
/// OffsetSimdViewT byte_offsets,
5193+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
5194+
/// PropertyListT props = {});
5195+
/// Variation of the API that allows to use \c simd_view without specifying \c T
5196+
/// and \c N template parameters.
5197+
/// Loads ("gathers") elements of the type 'T' from Shared Local Memory
5198+
/// locations addressed by byte offsets \p byte_offsets, and returns the loaded
5199+
/// elements. Access to any element's memory location can be disabled via the
5200+
/// input vector of predicates \p mask. If mask[i] is unset, then the load from
5201+
/// (byte_offsets[i]) is skipped and the corresponding i-th element from
5202+
/// \p pass_thru operand is returned.
5203+
/// @tparam VS Vector size. It can also be read as the number of reads per each
5204+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
5205+
/// only on DG2 and PVC.
5206+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
5207+
/// For each i, (byte_offsets[i]) must be element size aligned.
5208+
/// If the alignment property is not passed, then it is assumed that each
5209+
/// accessed address is aligned by element-size.
5210+
/// @param mask The access mask, defaults to all 1s.
5211+
/// @param pass_thru The vector pass through values.
5212+
/// @param props The optional compile-time properties. Only 'alignment'
5213+
/// property is used.
5214+
/// @return A vector of elements read.
5215+
template <
5216+
int VS, typename T, int N, typename OffsetSimdViewT,
5217+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
5218+
__ESIMD_API std::enable_if_t<
5219+
(detail::is_simd_view_type_v<OffsetSimdViewT> &&
5220+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
5221+
simd<T, N>>
5222+
slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
5223+
simd<T, N> pass_thru, PropertyListT props = {}) {
5224+
static_assert(N / VS ==
5225+
OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
5226+
"Size of pass_thru parameter must correspond to the size of "
5227+
"byte_offsets parameter.");
5228+
return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru, props);
5229+
}
5230+
5231+
/// template <int VS = 1,
5232+
/// typename OffsetSimdViewT, typename PassThruSimdViewT,
5233+
/// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
5234+
/// typename T = PassThruSimdViewT::value_type::element_type,
5235+
/// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
5236+
/// simd <T, N> slm_gather(
5237+
/// OffsetSimdViewT byte_offsets,
5238+
/// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
5239+
/// PropertyListT props = {});
5240+
/// Variation of the API that allows to use \c simd_view without specifying \c T
5241+
/// and \c N template parameters.
5242+
/// Loads ("gathers") elements of the type 'T' from Shared Local Memory
5243+
/// locations addressed by byte offsets \p byte_offsets, and returns the loaded
5244+
/// elements. Access to any element's memory location can be disabled via the
5245+
/// input vector of predicates \p mask. If mask[i] is unset, then the load from
5246+
/// (byte_offsets[i]) is skipped and the corresponding i-th element from
5247+
/// \p pass_thru operand is returned.
5248+
/// @tparam VS Vector size. It can also be read as the number of reads per each
5249+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
5250+
/// only on DG2 and PVC.
5251+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
5252+
/// For each i, (byte_offsets[i]) must be element size aligned.
5253+
/// If the alignment property is not passed, then it is assumed that each
5254+
/// accessed address is aligned by element-size.
5255+
/// @param mask The access mask, defaults to all 1s.
5256+
/// @param pass_thru The vector pass through values.
5257+
/// @param props The optional compile-time properties. Only 'alignment'
5258+
/// property is used.
5259+
/// @return A vector of elements read.
5260+
template <
5261+
int VS = 1, typename OffsetSimdViewT, typename PassThruSimdViewT,
5262+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
5263+
typename T = PassThruSimdViewT::value_type::element_type,
5264+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
5265+
__ESIMD_API std::enable_if_t<
5266+
(detail::is_simd_view_type_v<OffsetSimdViewT> &&
5267+
detail::is_simd_view_type_v<PassThruSimdViewT> &&
5268+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
5269+
simd<T, N>>
5270+
slm_gather(OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
5271+
PassThruSimdViewT pass_thru, PropertyListT props = {}) {
5272+
static_assert(N / VS ==
5273+
OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
5274+
"Size of pass_thru parameter must correspond to the size of "
5275+
"byte_offsets parameter.");
5276+
return slm_gather<T, N, VS>(byte_offsets.read(), mask, pass_thru.read(),
5277+
props);
5278+
}
5279+
5280+
/// template <int VS = 1,
5281+
/// typename PassThruSimdViewT,
5282+
/// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
5283+
/// typename T = PassThruSimdViewT::value_type::element_type,
5284+
/// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
5285+
/// simd <T, N> slm_gather(
5286+
/// OffsetSimdViewT byte_offsets,
5287+
/// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
5288+
/// PropertyListT props = {});
5289+
/// Variation of the API that allows to use \c simd_view without specifying \c T
5290+
/// and \c N template parameters.
5291+
/// Loads ("gathers") elements of the type 'T' from Shared Local Memory
5292+
/// locations addressed by byte offsets \p byte_offsets, and returns the loaded
5293+
/// elements. Access to any element's memory location can be disabled via the
5294+
/// input vector of predicates \p mask. If mask[i] is unset, then the load from
5295+
/// (byte_offsets[i]) is skipped and the corresponding i-th element from
5296+
/// \p pass_thru operand is returned.
5297+
/// @tparam VS Vector size. It can also be read as the number of reads per each
5298+
/// address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
5299+
/// only on DG2 and PVC.
5300+
/// @param byte_offsets the vector of 32-bit offsets in bytes.
5301+
/// For each i, (byte_offsets[i]) must be element size aligned.
5302+
/// If the alignment property is not passed, then it is assumed that each
5303+
/// accessed address is aligned by element-size.
5304+
/// @param mask The access mask, defaults to all 1s.
5305+
/// @param pass_thru The vector pass through values.
5306+
/// @param props The optional compile-time properties. Only 'alignment'
5307+
/// property is used.
5308+
/// @return A vector of elements read.
5309+
template <
5310+
int VS = 1, typename PassThruSimdViewT,
5311+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
5312+
typename T = PassThruSimdViewT::value_type::element_type,
5313+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
5314+
__ESIMD_API std::enable_if_t<
5315+
(detail::is_simd_view_type_v<PassThruSimdViewT> &&
5316+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
5317+
simd<T, N>>
5318+
slm_gather(simd<uint32_t, N / VS> byte_offsets, simd_mask<N / VS> mask,
5319+
PassThruSimdViewT pass_thru, PropertyListT props = {}) {
5320+
return slm_gather<T, N, VS>(byte_offsets, mask, pass_thru.read(), props);
5321+
}
5322+
51035323
/// simd <T, N> slm_gather(
51045324
/// OffsetSimdViewT byte_offsets,
51055325
/// simd_mask<N / VS> mask, PropertyListT props = {}); // (slm-ga-8)
@@ -8848,6 +9068,91 @@ gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
88489068
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
88499069
}
88509070

9071+
/// template <int VS, typename T, int N, typename OffsetSimdViewT,
9072+
// typename PropertyListT = empty_properties_t>
9073+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
9074+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
9075+
/// PropertyListT props = {});
9076+
/// This function is identical to (lacc-ga-1) except that the \p byte_offsets
9077+
/// is represented as \c simd_view.
9078+
template <
9079+
int VS, typename T, int N, typename AccessorT, typename OffsetSimdViewT,
9080+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
9081+
__ESIMD_API std::enable_if_t<
9082+
(detail::is_local_accessor_with_v<AccessorT,
9083+
detail::accessor_mode_cap::can_read> &&
9084+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
9085+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
9086+
simd<T, N>>
9087+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
9088+
simd<T, N> pass_thru, PropertyListT props = {}) {
9089+
static_assert(N / VS ==
9090+
OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
9091+
"Size of pass_thru parameter must correspond to the size of "
9092+
"byte_offsets parameter.");
9093+
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru, props);
9094+
}
9095+
9096+
/// template <int VS = 1, typename AccessorT,
9097+
/// typename OffsetSimdViewT, typename PassThruSimdViewT,
9098+
/// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
9099+
/// typename T = PassThruSimdViewT::value_type::element_type,
9100+
/// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>>
9101+
/// simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
9102+
/// simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
9103+
/// PropertyListT props = {});
9104+
/// This function is identical to (lacc-ga-1) except that the \p byte_offsets
9105+
/// and \p pass_thru are represented as \c simd_view.
9106+
template <
9107+
int VS = 1, typename AccessorT, typename OffsetSimdViewT,
9108+
typename PassThruSimdViewT,
9109+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
9110+
typename T = PassThruSimdViewT::value_type::element_type,
9111+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
9112+
__ESIMD_API std::enable_if_t<
9113+
(detail::is_local_accessor_with_v<AccessorT,
9114+
detail::accessor_mode_cap::can_read> &&
9115+
detail::is_simd_view_type_v<OffsetSimdViewT> &&
9116+
detail::is_simd_view_type_v<PassThruSimdViewT> &&
9117+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
9118+
simd<T, N>>
9119+
gather(AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
9120+
PassThruSimdViewT pass_thru, PropertyListT props = {}) {
9121+
static_assert(N / VS ==
9122+
OffsetSimdViewT::getSizeX() * OffsetSimdViewT::getSizeY(),
9123+
"Size of pass_thru parameter must correspond to the size of "
9124+
"byte_offsets parameter.");
9125+
return gather<T, N, VS>(acc, byte_offsets.read(), mask, pass_thru.read(),
9126+
props);
9127+
}
9128+
9129+
/// template <int VS = 1, typename AccessorT,
9130+
/// typename PassThruSimdViewT,
9131+
/// int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
9132+
/// typename T = PassThruSimdViewT::value_type::element_type,
9133+
/// typename PropertyListT = ext::oneapi::experimental::empty_properties_t>>
9134+
/// simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
9135+
/// simd_mask<N / VS> mask, simd<T, N> pass_thru,
9136+
/// PropertyListT props = {});
9137+
/// This function is identical to (lacc-ga-1) except that the \p pass_thru
9138+
/// is represented as \c simd_view.
9139+
template <
9140+
int VS = 1, typename AccessorT, typename PassThruSimdViewT,
9141+
int N = PassThruSimdViewT::getSizeX() * PassThruSimdViewT::getSizeY(),
9142+
typename T = PassThruSimdViewT::value_type::element_type,
9143+
typename PropertyListT = ext::oneapi::experimental::empty_properties_t>
9144+
__ESIMD_API std::enable_if_t<
9145+
(detail::is_local_accessor_with_v<AccessorT,
9146+
detail::accessor_mode_cap::can_read> &&
9147+
detail::is_simd_view_type_v<PassThruSimdViewT> &&
9148+
ext::oneapi::experimental::is_property_list_v<PropertyListT>),
9149+
simd<T, N>>
9150+
gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
9151+
simd_mask<N / VS> mask, PassThruSimdViewT pass_thru,
9152+
PropertyListT props = {}) {
9153+
return gather<T, N, VS>(acc, byte_offsets, mask, pass_thru.read(), props);
9154+
}
9155+
88519156
/// template <typename T, int N, int VS = 1, typename AccessorT,
88529157
/// typename OffsetSimdViewT,
88539158
// typename PropertyListT = empty_properties_t>

0 commit comments

Comments
 (0)