@@ -1891,10 +1891,12 @@ template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
18911891 T, NBlocks, BlockHeight, BlockWidth, Transposed, Transformed>()>
18921892ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d (
18931893 config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload) {
1894+ using RawT = __ESIMD_DNS::__raw_t <T>;
18941895 __ESIMD_DNS::check_lsc_block_2d_restrictions<
1895- T , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1896+ RawT , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
18961897 __ESIMD_DNS::block_2d_op::load>();
18971898 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1899+ using CacheVectorT = __ESIMD_DNS::vector_type_t <uint8_t , 2 >;
18981900 __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
18991901 PropertyListT>();
19001902 constexpr int ElemsPerDword = 4 / sizeof (T);
@@ -1920,27 +1922,28 @@ ESIMD_INLINE SYCL_ESIMD_FUNCTION __ESIMD_NS::simd<T, N> lsc_load_2d(
19201922 __ESIMD_DNS::roundUpNextMultiple<DstElements * sizeof (T), GrfBytes>();
19211923 constexpr uint32_t DstLength =
19221924 (DstBlockSize / GrfBytes) > 31 ? 31 : (DstBlockSize / GrfBytes);
1923- constexpr uint32_t DstLengthMask = DstLength << 20 ;
19241925
19251926 static_assert (N == ActualN || N == DstElements, " Incorrect element count" );
19261927
1927- constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1928- << 17 ;
1929- constexpr uint32_t base_desc = 0x2000003 ;
1930- constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0 ;
1931- constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0 ;
1932- constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9 ;
1933- __ESIMD_NS::simd<T, N> oldDst;
1934- constexpr uint32_t exDesc = 0x0 ;
1935- constexpr uint32_t desc = base_desc | cache_mask | transformMask |
1936- transposeMask | dataSizeMask | DstLengthMask;
1937- constexpr uint8_t execSize = 1 ;
1938- constexpr uint8_t sfid = 0xF ;
1939- constexpr uint8_t numSrc0 = 0x1 ;
1940- constexpr uint8_t numDst = (N * sizeof (T)) / 64 ;
1941- __ESIMD_NS::simd<T, ActualN> Raw =
1942- __ESIMD_NS::raw_send<execSize, sfid, numSrc0, numDst>(
1943- oldDst, payload.get_raw_data (), exDesc, desc);
1928+ __ESIMD_NS::simd<RawT, N> oldDst;
1929+ constexpr uint16_t Mask = 1 ;
1930+ constexpr CacheVectorT Cache = {static_cast <uint8_t >(L1H),
1931+ static_cast <uint8_t >(L2H)};
1932+
1933+ __ESIMD_NS::simd<T, ActualN> Raw;
1934+
1935+ if constexpr (Transposed)
1936+ Raw = __esimd_lsc_load2d_descriptor_transpose<RawT, NBlocks, BlockWidth,
1937+ BlockHeight, 0 , 0 , N>(
1938+ Mask, payload.get_raw_data ().data (), oldDst.data (), Cache);
1939+ else if constexpr (Transformed)
1940+ Raw = __esimd_lsc_load2d_descriptor_transform<RawT, NBlocks, BlockWidth,
1941+ BlockHeight, 0 , 0 , N>(
1942+ Mask, payload.get_raw_data ().data (), oldDst.data (), Cache);
1943+ else
1944+ Raw = __esimd_lsc_load2d_descriptor<RawT, NBlocks, BlockWidth, BlockHeight,
1945+ 0 , 0 , N>(
1946+ Mask, payload.get_raw_data ().data (), oldDst.data (), Cache);
19441947
19451948 if constexpr (ActualN == N) {
19461949 return Raw;
@@ -1988,27 +1991,24 @@ template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
19881991ESIMD_INLINE SYCL_ESIMD_FUNCTION void lsc_prefetch_2d (
19891992 config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload) {
19901993 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
1994+ using CacheVectorT = __ESIMD_DNS::vector_type_t <uint8_t , 2 >;
1995+ using RawT = __ESIMD_DNS::__raw_t <T>;
19911996 __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::load,
19921997 PropertyListT>();
19931998 __ESIMD_DNS::check_lsc_block_2d_restrictions<
1994- T , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
1999+ RawT , BlockWidth, BlockHeight, NBlocks, Transposed, Transformed,
19952000 __ESIMD_DNS::block_2d_op::prefetch>();
19962001 static_assert (!Transposed || !Transformed,
19972002 " Transposed and transformed is not supported" );
1998- constexpr uint32_t cache_mask = detail::get_lsc_load_cache_mask<L1H, L2H>()
1999- << 17 ;
2000- constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9 ;
2001- constexpr uint32_t base_desc = 0x2000003 ;
2002- constexpr uint32_t transformMask = Transformed ? 1 << 7 : 0 ;
2003- constexpr uint32_t transposeMask = Transposed ? 1 << 15 : 0 ;
2004- constexpr uint32_t exDesc = 0x0 ;
2005- constexpr uint32_t desc =
2006- base_desc | cache_mask | transformMask | transposeMask | dataSizeMask;
2007- constexpr uint8_t execSize = 1 ;
2008- constexpr uint8_t sfid = 0xF ;
2009- constexpr uint8_t numDst = (N * sizeof (T)) / 64 ;
2010- __ESIMD_NS::raw_send<execSize, sfid, numDst>(payload.get_raw_data (), exDesc,
2011- desc);
2003+
2004+ __ESIMD_NS::simd<RawT, N> oldDst;
2005+ constexpr uint16_t Mask = 1 ;
2006+ constexpr CacheVectorT Cache = {static_cast <uint8_t >(L1H),
2007+ static_cast <uint8_t >(L2H)};
2008+
2009+ __esimd_lsc_prefetch_descriptor<RawT, NBlocks, BlockWidth, BlockHeight, 0 , 0 ,
2010+ N>(Mask, payload.get_raw_data ().data (),
2011+ oldDst.data (), Cache);
20122012}
20132013
20142014// / A variation of \c 2D stateless block store \c with parameters passed as
@@ -2033,27 +2033,21 @@ template <typename T, int BlockWidth, int BlockHeight = 1, int NBlocks = 1,
20332033ESIMD_INLINE SYCL_ESIMD_FUNCTION void
20342034lsc_store_2d (config_2d_mem_access<T, BlockWidth, BlockHeight, NBlocks> &payload,
20352035 __ESIMD_NS::simd<T, N> Data) {
2036+ using RawT = __ESIMD_DNS::__raw_t <T>;
20362037 __ESIMD_DNS::check_lsc_block_2d_restrictions<
2037- T , BlockWidth, BlockHeight, NBlocks, false , false ,
2038+ RawT , BlockWidth, BlockHeight, NBlocks, false , false ,
20382039 __ESIMD_DNS::block_2d_op::store>();
20392040 using PropertyListT = __ESIMD_DNS::make_L1_L2_properties_t<L1H, L2H>;
2041+ using CacheVectorT = __ESIMD_DNS::vector_type_t <uint8_t , 2 >;
20402042 __ESIMD_DNS::check_cache_hints<__ESIMD_DNS::cache_action::store,
20412043 PropertyListT>();
20422044
2043- constexpr uint32_t cache_mask = detail::get_lsc_store_cache_mask<L1H, L2H>()
2044- << 17 ;
2045- constexpr uint32_t dataSizeMask = detail::get_lsc_data_size<T>() << 9 ;
2046- constexpr uint32_t base_desc = 0x2000007 ;
2047-
2048- constexpr uint32_t exDesc = 0x0 ;
2049- constexpr uint32_t desc = base_desc | cache_mask | dataSizeMask;
2050- constexpr uint8_t execSize = 1 ;
2051- constexpr uint8_t sfid = 0xF ;
2052- constexpr uint8_t numSrc0 = 0x1 ;
2053- constexpr uint8_t numSrc1 = (N * sizeof (T)) / 64 ;
2045+ constexpr uint16_t Mask = 1 ;
2046+ constexpr CacheVectorT Cache = {static_cast <uint8_t >(L1H),
2047+ static_cast <uint8_t >(L2H)};
20542048
2055- __ESIMD_NS::raw_sends<execSize, sfid, numSrc0, numSrc1 >(
2056- payload.get_raw_data (), Data, exDesc, desc );
2049+ __esimd_lsc_store_descriptor<RawT, NBlocks, BlockWidth, BlockHeight, 0 , 0 , N >(
2050+ Mask, payload.get_raw_data (). data () , Data. data (), Cache );
20572051}
20582052
20592053namespace detail {
0 commit comments