@@ -1016,7 +1016,7 @@ DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheo
10161016// To support new SPV_INTEL_2d_block_io only without matching cl_intel_subgroup_2d_block_io built-in, use
10171017// DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_ macros
10181018
1019- #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1019+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ_CACHE_CONTROLS (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
10201020INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address, int width, int height, int pitch, int2 coord, __private void* destination, enum LSC_LDCC cache_controls) \
10211021{ \
10221022 long baseoffset = as_long(base_address); \
@@ -1027,11 +1027,21 @@ INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address,
10271027 *(__private INTERNAL_DST_TYPE*)destination = ret; \
10281028}
10291029
1030+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1031+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ_CACHE_CONTROLS(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
1032+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ_CACHE_CONTROLS(FUNC_NAME##_sg32, HALVE_TYPE(INTERNAL_DST_TYPE), INTERNAL_FUNC##_sg32)
1033+
1034+ // The same 2D block dimensions use different data type per work item
1035+ // depending on the subgroup size. Define unique functions for each variant.
10301036#define DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ (FUNC_NAME , DST_PTR_TYPE , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
10311037DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
10321038INLINE void OVERLOADABLE FUNC_NAME(__global void* base_address, int width, int height, int pitch, int2 coord, __private DST_PTR_TYPE* destination) \
10331039{ \
10341040 __internal_##FUNC_NAME##_cache_controls(base_address, width, height, pitch, coord, (__private void *)destination, LSC_LDCC_DEFAULT); \
1041+ } \
1042+ INLINE void OVERLOADABLE FUNC_NAME##_sg32(__global void* base_address, int width, int height, int pitch, int2 coord, __private DST_PTR_TYPE* destination) \
1043+ { \
1044+ __internal_##FUNC_NAME##_sg32_cache_controls(base_address, width, height, pitch, coord, (__private void *)destination, LSC_LDCC_DEFAULT); \
10351045}
10361046
10371047// type d8, block width 16, array length 1
@@ -1237,7 +1247,7 @@ DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_32b_32r
12371247DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH (intel_sub_group_2d_block_prefetch_8b_32r16x2c , __builtin_IB_subgroup_block_read_prefetch_u8_m32k16v2 )
12381248DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH (intel_sub_group_2d_block_prefetch_8b_32r16x4c , __builtin_IB_subgroup_block_read_prefetch_u8_m32k16v4 )
12391249
1240- #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1250+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE_CACHE_CONTROLS (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
12411251INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address, int width, int height, int pitch, int2 coord, private void* val, enum LSC_LDCC cache_controls) \
12421252{ \
12431253 long baseoffset = as_long(base_address); \
@@ -1247,11 +1257,21 @@ INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address,
12471257 INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, *(private INTERNAL_DST_TYPE*)val, cache_controls); \
12481258}
12491259
1260+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1261+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE_CACHE_CONTROLS(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
1262+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE_CACHE_CONTROLS(FUNC_NAME##_sg32, HALVE_TYPE(INTERNAL_DST_TYPE), INTERNAL_FUNC##_sg32)
1263+
1264+ // The same 2D block dimensions use different data type per work item
1265+ // depending on the subgroup size. Define unique functions for each variant.
12501266#define DEFN_INTEL_SUB_GROUP_2D_BLOCK_WRITE (FUNC_NAME , DST_PTR_TYPE , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
12511267DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
12521268INLINE void OVERLOADABLE FUNC_NAME(__global void* base_address, int width, int height, int pitch, int2 coord, private DST_PTR_TYPE* val) \
12531269{ \
12541270 __internal_##FUNC_NAME##_cache_controls(base_address, width, height, pitch, coord, (private void*) val, LSC_LDCC_DEFAULT); \
1271+ } \
1272+ INLINE void OVERLOADABLE FUNC_NAME##_sg32(__global void* base_address, int width, int height, int pitch, int2 coord, private DST_PTR_TYPE* val) \
1273+ { \
1274+ __internal_##FUNC_NAME##_sg32_cache_controls(base_address, width, height, pitch, coord, (private void*) val, LSC_LDCC_DEFAULT); \
12551275}
12561276
12571277DEFN_INTEL_SUB_GROUP_2D_BLOCK_WRITE (intel_sub_group_2d_block_write_8b_1r32x1c , ushort , ushort , __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1 )
0 commit comments