@@ -1016,7 +1016,7 @@ DEFN_INTEL_SUB_GROUP_BLOCK_WRITE_LSC_CACHEOPTS(intel_subgroup_block_write_cacheo
1016
1016
// To support new SPV_INTEL_2d_block_io only without matching cl_intel_subgroup_2d_block_io built-in, use
1017
1017
// DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_ macros
1018
1018
1019
- #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1019
+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ_CACHE_CONTROLS (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1020
1020
INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address, int width, int height, int pitch, int2 coord, __private void* destination, enum LSC_LDCC cache_controls) \
1021
1021
{ \
1022
1022
long baseoffset = as_long(base_address); \
@@ -1027,11 +1027,21 @@ INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address,
1027
1027
*(__private INTERNAL_DST_TYPE*)destination = ret; \
1028
1028
}
1029
1029
1030
+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1031
+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ_CACHE_CONTROLS(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
1032
+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ_CACHE_CONTROLS(FUNC_NAME##_sg32, HALVE_TYPE(INTERNAL_DST_TYPE), INTERNAL_FUNC##_sg32)
1033
+
1034
+ // The same 2D block dimensions use different data type per work item
1035
+ // depending on the subgroup size. Define unique functions for each variant.
1030
1036
#define DEFN_INTEL_SUB_GROUP_2D_BLOCK_READ (FUNC_NAME , DST_PTR_TYPE , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1031
1037
DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_READ(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
1032
1038
INLINE void OVERLOADABLE FUNC_NAME(__global void* base_address, int width, int height, int pitch, int2 coord, __private DST_PTR_TYPE* destination) \
1033
1039
{ \
1034
1040
__internal_##FUNC_NAME##_cache_controls(base_address, width, height, pitch, coord, (__private void *)destination, LSC_LDCC_DEFAULT); \
1041
+ } \
1042
+ INLINE void OVERLOADABLE FUNC_NAME##_sg32(__global void* base_address, int width, int height, int pitch, int2 coord, __private DST_PTR_TYPE* destination) \
1043
+ { \
1044
+ __internal_##FUNC_NAME##_sg32_cache_controls(base_address, width, height, pitch, coord, (__private void *)destination, LSC_LDCC_DEFAULT); \
1035
1045
}
1036
1046
1037
1047
// type d8, block width 16, array length 1
@@ -1237,7 +1247,7 @@ DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH(intel_sub_group_2d_block_prefetch_32b_32r
1237
1247
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH (intel_sub_group_2d_block_prefetch_8b_32r16x2c , __builtin_IB_subgroup_block_read_prefetch_u8_m32k16v2 )
1238
1248
DEFN_INTEL_SUB_GROUP_2D_BLOCK_PREFETCH (intel_sub_group_2d_block_prefetch_8b_32r16x4c , __builtin_IB_subgroup_block_read_prefetch_u8_m32k16v4 )
1239
1249
1240
- #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1250
+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE_CACHE_CONTROLS (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1241
1251
INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address, int width, int height, int pitch, int2 coord, private void* val, enum LSC_LDCC cache_controls) \
1242
1252
{ \
1243
1253
long baseoffset = as_long(base_address); \
@@ -1247,11 +1257,21 @@ INLINE void __internal_##FUNC_NAME##_cache_controls(__global void* base_address,
1247
1257
INTERNAL_FUNC(baseoffset, width_minus_one, height_minus_one, pitch_minus_one, coord, *(private INTERNAL_DST_TYPE*)val, cache_controls); \
1248
1258
}
1249
1259
1260
+ #define DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE (FUNC_NAME , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1261
+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE_CACHE_CONTROLS(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
1262
+ DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE_CACHE_CONTROLS(FUNC_NAME##_sg32, HALVE_TYPE(INTERNAL_DST_TYPE), INTERNAL_FUNC##_sg32)
1263
+
1264
+ // The same 2D block dimensions use different data type per work item
1265
+ // depending on the subgroup size. Define unique functions for each variant.
1250
1266
#define DEFN_INTEL_SUB_GROUP_2D_BLOCK_WRITE (FUNC_NAME , DST_PTR_TYPE , INTERNAL_DST_TYPE , INTERNAL_FUNC ) \
1251
1267
DEFN_INTERNAL_INTEL_SUB_GROUP_2D_BLOCK_WRITE(FUNC_NAME, INTERNAL_DST_TYPE, INTERNAL_FUNC) \
1252
1268
INLINE void OVERLOADABLE FUNC_NAME(__global void* base_address, int width, int height, int pitch, int2 coord, private DST_PTR_TYPE* val) \
1253
1269
{ \
1254
1270
__internal_##FUNC_NAME##_cache_controls(base_address, width, height, pitch, coord, (private void*) val, LSC_LDCC_DEFAULT); \
1271
+ } \
1272
+ INLINE void OVERLOADABLE FUNC_NAME##_sg32(__global void* base_address, int width, int height, int pitch, int2 coord, private DST_PTR_TYPE* val) \
1273
+ { \
1274
+ __internal_##FUNC_NAME##_sg32_cache_controls(base_address, width, height, pitch, coord, (private void*) val, LSC_LDCC_DEFAULT); \
1255
1275
}
1256
1276
1257
1277
DEFN_INTEL_SUB_GROUP_2D_BLOCK_WRITE (intel_sub_group_2d_block_write_8b_1r32x1c , ushort , ushort , __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1 )
0 commit comments