@@ -37,13 +37,14 @@ INLINE TYPE_RET OVERLOADABLE dot_acc_sat(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc)
3737#define DEFN_INTEL_DOT_PRODUCT_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
3838TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b) \
3939{ \
40- return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b), false); \
40+ return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
4141}
4242
43- #define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
44- TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc) \
45- { \
46- return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, as_int(a), as_int(b), true); \
43+ #define DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB , SAT_PREFIX ) \
44+ TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc) \
45+ { \
46+ TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
47+ return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc); \
4748}
4849
4950#define DEFN_INTEL_DOT_PRODUCT_US (TYPE_RET , TYPE_ARG , MANGLING_OLD , MANGLING_NEW ) \
@@ -61,13 +62,14 @@ INLINE TYPE_RET OVERLOADABLE dot_acc_sat(u##TYPE_ARG a, TYPE_ARG b, TYPE_RET acc
6162#define DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
6263TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_ARG1 packed) \
6364{ \
64- return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b), false); \
65+ return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
6566}
6667
67- #define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB ) \
68- TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed) \
69- { \
70- return __builtin_IB_dp4a_##TYPE_SUFFIX_IB(acc, as_int(a), as_int(b), true); \
68+ #define DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (TYPE_RET , TYPE_ARG1 , TYPE_ARG2 , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW , TYPE_SUFFIX_IB , SAT_PREFIX ) \
69+ TYPE_RET SPIRV_OVERLOADABLE SPIRV_BUILTIN(TYPE_SUFFIX##DotAccSatKHR, MANGLING_OLD, MANGLING_NEW)(TYPE_ARG1 a, TYPE_ARG2 b, TYPE_RET acc, TYPE_ARG1 packed) \
70+ { \
71+ TYPE_RET product = __builtin_IB_dp4a_##TYPE_SUFFIX_IB(0, as_int(a), as_int(b)); \
72+ return SPIRV_OCL_BUILTIN(SAT_PREFIX##_add_sat, _i32_i32,)(product, acc); \
7173}
7274
7375#define DEFN_INTEL_DOT_PRODUCT_PACKED (TYPE_RET , ARG_TYPES , TYPE_SUFFIX , MANGLING_OLD , MANGLING_NEW ) \
@@ -103,12 +105,12 @@ DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(uint, uint, uint, U, _i32_i32_i32, _
103105DEFN_INTEL_DOT_PRODUCT_PACKED (uint , uu , U , _i32_i32_i32 , _Ruint )
104106#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
105107#ifdef __opencl_c_integer_dot_product_saturation_accumulation
106- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , uchar4 , uchar4 , U , _v4i8_v4i8_i32 , _Ruint , uu )
108+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , uchar4 , uchar4 , U , _v4i8_v4i8_i32 , _Ruint , uu , u )
107109DEFN_INTEL_DOT_PRODUCT_SAT (uint , uchar4 , uchar4 , U , _v4i8_v4i8_i32 , _Ruint )
108- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , ushort2 , ushort2 , U , _v2i16_v2i16_i32 , _Ruint , uu )
110+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (uint , ushort2 , ushort2 , U , _v2i16_v2i16_i32 , _Ruint , uu , u )
109111DEFN_INTEL_DOT_PRODUCT_SAT (uint , ushort2 , ushort2 , U , _v2i16_v2i16_i32 , _Ruint )
110112#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
111- DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (uint , uint , uint , U , _i32_i32_i32_i32 , _Ruint , uu )
113+ DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (uint , uint , uint , U , _i32_i32_i32_i32 , _Ruint , uu , u )
112114DEFN_INTEL_DOT_PRODUCT_SAT_PACKED (uint , U , uu , _i32_i32_i32_i32 , _Ruint )
113115#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
114116#endif // __opencl_c_integer_dot_product_saturation_accumulation
@@ -123,12 +125,12 @@ DEFN_INTEL_DOT_PRODUCT_PACKED_BUILTIN_SPIRV(int, int, int, S, _i32_i32_i32, _Rin
123125DEFN_INTEL_DOT_PRODUCT_PACKED (int , ss , S , _i32_i32_i32 , _Rint )
124126#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
125127#ifdef __opencl_c_integer_dot_product_saturation_accumulation
126- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , char4 , S , _v4i8_v4i8_i32 , _Rint , ss )
128+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , char4 , S , _v4i8_v4i8_i32 , _Rint , ss , s )
127129DEFN_INTEL_DOT_PRODUCT_SAT (int , char4 , char4 , S , _v4i8_v4i8_i32 , _Rint )
128- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , short2 , S , _v2i16_v2i16_i32 , _Rint , ss )
130+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , short2 , S , _v2i16_v2i16_i32 , _Rint , ss , s )
129131DEFN_INTEL_DOT_PRODUCT_SAT (int , short2 , short2 , S , _v2i16_v2i16_i32 , _Rint )
130132#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
131- DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , int , S , _i32_i32_i32_i32 , _Rint , ss )
133+ DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , int , S , _i32_i32_i32_i32 , _Rint , ss , s )
132134DEFN_INTEL_DOT_PRODUCT_SAT_PACKED (int , S , ss , _i32_i32_i32_i32 , _Rint )
133135#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
134136#endif // __opencl_c_integer_dot_product_saturation_accumulation
@@ -147,14 +149,14 @@ DEFN_INTEL_DOT_PRODUCT_PACKED(int, su, SU, _i32_i32_i32, _Rint)
147149DEFN_INTEL_DOT_PRODUCT_PACKED_US
148150#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
149151#ifdef __opencl_c_integer_dot_product_saturation_accumulation
150- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , uchar4 , SU , _v4i8_v4i8_i32 , _Rint , su )
152+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , char4 , uchar4 , SU , _v4i8_v4i8_i32 , _Rint , su , s )
151153DEFN_INTEL_DOT_PRODUCT_SAT (int , char4 , uchar4 , SU , _v4i8_v4i8_i32 , _Rint )
152154DEFN_INTEL_DOT_PRODUCT_SAT_US (int , char4 , _v4i8_v4i8_i32 , _Rint )
153- DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , ushort2 , SU , _v2i16_v2i16_i32 , _Rint , su )
155+ DEFN_INTEL_DOT_PRODUCT_SAT_BUILTIN_SPIRV (int , short2 , ushort2 , SU , _v2i16_v2i16_i32 , _Rint , su , s )
154156DEFN_INTEL_DOT_PRODUCT_SAT (int , short2 , ushort2 , SU , _v2i16_v2i16_i32 , _Rint )
155157DEFN_INTEL_DOT_PRODUCT_SAT_US (int , short2 , _v2i16_v2i16_i32 , _Rint )
156158#ifdef __opencl_c_integer_dot_product_input_4x8bit_packed
157- DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , uint , SU , _i32_i32_i32_i32 , _Rint , su )
159+ DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_BUILTIN_SPIRV (int , int , uint , SU , _i32_i32_i32_i32 , _Rint , su , s )
158160DEFN_INTEL_DOT_PRODUCT_SAT_PACKED (int , SU , su , _i32_i32_i32_i32 , _Rint )
159161DEFN_INTEL_DOT_PRODUCT_SAT_PACKED_US
160162#endif // __opencl_c_integer_dot_product_input_4x8bit_packed
0 commit comments