Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 50 additions & 50 deletions libclc/libspirv/lib/amdgcn-amdhsa/misc/sub_group_shuffle.cl
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ __AMDGCN_CLC_SUBGROUP_SUB_I32(unsigned short, t);

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_OVERLOAD _CLC_DEF half
__spirv_SubgroupShuffleINTEL(half Data, unsigned int InvocationId) {
_CLC_OVERLOAD _CLC_DEF __clc_float16_t
__spirv_SubgroupShuffleINTEL(__clc_float16_t Data, unsigned int InvocationId) {
unsigned short tmp = __clc_as_ushort(Data);
tmp = __spirv_SubgroupShuffleINTEL(tmp, InvocationId);
return __clc_as_half(tmp);
}
_CLC_DEF half _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j(
half Data, unsigned int InvocationId) {
_CLC_DEF __clc_float16_t _Z28__spirv_SubgroupShuffleINTELIDF16_ET_S0_j(
__clc_float16_t Data, unsigned int InvocationId) {
return __spirv_SubgroupShuffleINTEL(Data, InvocationId);
}
#endif // cl_khr_fp16
Expand Down Expand Up @@ -165,10 +165,10 @@ __AMDGCN_CLC_SUBGROUP_TO_VEC(ulong8, 8)
__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong16, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_TO_VEC(half2, 2)
__AMDGCN_CLC_SUBGROUP_TO_VEC(half4, 4)
__AMDGCN_CLC_SUBGROUP_TO_VEC(half8, 8)
__AMDGCN_CLC_SUBGROUP_TO_VEC(half16, 16)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec2_float16_t, 2)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec4_float16_t, 4)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec8_float16_t, 8)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec16_float16_t, 16)
#endif // cl_khr_fp16
// float
__AMDGCN_CLC_SUBGROUP_TO_VEC(float2, 2)
Expand Down Expand Up @@ -227,10 +227,10 @@ __AMDGCN_CLC_SUBGROUP_TO_VEC(ulong8, m, 8)
__AMDGCN_CLC_SUBGROUP_TO_VEC(ulong16, m, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_TO_VEC(half2, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_TO_VEC(half4, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_TO_VEC(half8, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_TO_VEC(half16, DF16_, 16)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec2_float16_t, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec4_float16_t, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec8_float16_t, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_TO_VEC(__clc_vec16_float16_t, DF16_, 16)
#endif // cl_khr_fp16
// float
__AMDGCN_CLC_SUBGROUP_TO_VEC(float2, f, 2)
Expand Down Expand Up @@ -262,7 +262,7 @@ _Z31__spirv_SubgroupShuffleXorINTELIiET_S0_j(int Data,

// Sub 32-bit types.
#define __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(TYPE) \
_CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleXor( \
_CLC_OVERLOAD _CLC_DEF TYPE __spirv_SubgroupShuffleXorINTEL( \
TYPE Data, unsigned int InvocationId) { \
return __spirv_SubgroupShuffleXorINTEL((int)Data, InvocationId); \
}
Expand All @@ -271,8 +271,8 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char);
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short);
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short);
#ifdef cl_khr_fp16
_CLC_OVERLOAD _CLC_DEF half
__spirv_SubgroupShuffleXorINTEL(half Data, unsigned int InvocationId) {
_CLC_OVERLOAD _CLC_DEF __clc_float16_t __spirv_SubgroupShuffleXorINTEL(
__clc_float16_t Data, unsigned int InvocationId) {
unsigned short tmp = __clc_as_ushort(Data);
tmp = (unsigned short)__spirv_SubgroupShuffleXorINTEL(tmp, InvocationId);
return __clc_as_half(tmp);
Expand All @@ -296,8 +296,8 @@ __AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned char, h);
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(short, s);
__AMDGCN_CLC_SUBGROUP_XOR_SUB_I32(unsigned short, t);
#ifdef cl_khr_fp16
_CLC_DEF half _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j(
half Data, unsigned int InvocationId) {
_CLC_DEF __clc_float16_t _Z31__spirv_SubgroupShuffleXorINTELIDF16_ET_S0_j(
__clc_float16_t Data, unsigned int InvocationId) {
return __spirv_SubgroupShuffleXorINTEL(Data, InvocationId);
}
#endif // cl_khr_fp16
Expand Down Expand Up @@ -409,10 +409,10 @@ __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float8, 8)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float16, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half2, 2)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half4, 4)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half8, 8)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half16, 16)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec2_float16_t, 2)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec4_float16_t, 4)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec8_float16_t, 8)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec16_float16_t, 16)
#endif // cl_khr_fp16
// double
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double2, 2)
Expand Down Expand Up @@ -470,10 +470,10 @@ __AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float8, f, 8)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(float16, f, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half2, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half4, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half8, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(half16, DF16_, 16)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec2_float16_t, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec4_float16_t, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec8_float16_t, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(__clc_vec16_float16_t, DF16_, 16)
#endif // cl_khr_fp16
// double
__AMDGCN_CLC_SUBGROUP_XOR_TO_VEC(double2, d, 2)
Expand Down Expand Up @@ -521,11 +521,11 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(char);
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned char);
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short);
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short);

// half
#ifdef cl_khr_fp16
_CLC_OVERLOAD _CLC_DEF half __spirv_SubgroupShuffleUpINTEL(half previous,
half current,
unsigned int delta) {
_CLC_OVERLOAD _CLC_DEF __clc_float16_t __spirv_SubgroupShuffleUpINTEL(
__clc_float16_t previous, __clc_float16_t current, unsigned int delta) {
unsigned short tmpP = __clc_as_ushort(previous);
unsigned short tmpC = __clc_as_ushort(current);
tmpC = __spirv_SubgroupShuffleUpINTEL(tmpP, tmpC, delta);
Expand All @@ -551,8 +551,8 @@ __AMDGCN_CLC_SUBGROUP_UP_SUB_I32(short, s);
__AMDGCN_CLC_SUBGROUP_UP_SUB_I32(unsigned short, t);
// half
#ifdef cl_khr_fp16
_CLC_DEF half _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j(
half previous, half current, unsigned int delta) {
_CLC_DEF __clc_float16_t _Z30__spirv_SubgroupShuffleUpINTELIDF16_ET_S0_S0_j(
__clc_float16_t previous, __clc_float16_t current, unsigned int delta) {
return __spirv_SubgroupShuffleUpINTEL(previous, current, delta);
}
#endif // cl_khr_fp16
Expand Down Expand Up @@ -663,10 +663,10 @@ __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong8, 8)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong16, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half2, 2)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half4, 4)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half8, 8)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half16, 16)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec2_float16_t, 2)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec4_float16_t, 4)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec8_float16_t, 8)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec16_float16_t, 16)
#endif // cl_khr_fp16
// float
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float2, 2)
Expand Down Expand Up @@ -724,10 +724,10 @@ __AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong8, m, 8)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(ulong16, m, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half2, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half4, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half8, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(half16, DF16_, 16)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec2_float16_t, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec4_float16_t, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec8_float16_t, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(__clc_vec16_float16_t, DF16_, 16)
#endif // cl_khr_fp16
// float
__AMDGCN_CLC_SUBGROUP_UP_TO_VEC(float2, f, 2)
Expand Down Expand Up @@ -782,8 +782,8 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short);
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short);
// half
#ifdef cl_khr_fp16
_CLC_OVERLOAD _CLC_DEF half
__spirv_SubgroupShuffleDownINTEL(half current, half next, unsigned int delta) {
_CLC_OVERLOAD _CLC_DEF __clc_float16_t __spirv_SubgroupShuffleDownINTEL(
__clc_float16_t current, __clc_float16_t next, unsigned int delta) {
unsigned short tmpC = __clc_as_ushort(current);
unsigned short tmpN = __clc_as_ushort(next);
tmpC = __spirv_SubgroupShuffleDownINTEL(tmpC, tmpN, delta);
Expand All @@ -809,8 +809,8 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(short, s);
__AMDGCN_CLC_SUBGROUP_DOWN_TO_I32(unsigned short, t);
// half
#ifdef cl_khr_fp16
_CLC_DEF half _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j(
half current, half next, unsigned int delta) {
_CLC_DEF __clc_float16_t _Z32__spirv_SubgroupShuffleDownINTELIDF16_ET_S0_S0_j(
__clc_float16_t current, __clc_float16_t next, unsigned int delta) {
return __spirv_SubgroupShuffleDownINTEL(current, next, delta);
}
#endif // cl_khr_fp16
Expand Down Expand Up @@ -919,10 +919,10 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong8, 8)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong16, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half2, 2)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half4, 4)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half8, 8)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half16, 16)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec2_float16_t, 2)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec4_float16_t, 4)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec8_float16_t, 8)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec16_float16_t, 16)
#endif // cl_khr_fp16
// float
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float2, 2)
Expand Down Expand Up @@ -980,10 +980,10 @@ __AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong8, m, 8)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(ulong16, m, 16)
// half
#ifdef cl_khr_fp16
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half2, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half4, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half8, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(half16, DF16_, 16)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec2_float16_t, DF16_, 2)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec4_float16_t, DF16_, 4)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec8_float16_t, DF16_, 8)
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(__clc_vec16_float16_t, DF16_, 16)
#endif // cl_khr_fp16
// float
__AMDGCN_CLC_SUBGROUP_DOWN_TO_VEC(float2, f, 2)
Expand Down
Loading