Skip to content

Commit 79df15f

Browse files
committed
Directly use __builtin_amdgcn_mov_dpp8
The builtin now supports overloads. Change-Id: Id34283800db05d9707d055f7a46fdce1fed3542a
1 parent 658cd57 commit 79df15f

File tree

1 file changed

+6
-12
lines changed

1 file changed

+6
-12
lines changed

amd/device-libs/ockl/src/wfredscan.cl

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,12 @@
6969

7070
// DPP8
7171
#define uint_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
72-
#define ulong_dpp8(X,S) ({ \
73-
uint2 __x = AS_UINT2(X); \
74-
uint2 __r; \
75-
__r.lo = uint_dpp8(__x.lo, S); \
76-
__r.hi = uint_dpp8(__x.hi, S); \
77-
AS_ULONG(__r); \
78-
})
79-
#define int_dpp8(X,S) AS_INT(uint_dpp8(AS_UINT(X),S))
80-
#define long_dpp8(X,S) AS_LONG(ulong_dpp8(AS_ULONG(X),S))
81-
#define float_dpp8(X,S) AS_FLOAT(uint_dpp8(AS_UINT(X),S))
82-
#define double_dpp8(X,S) AS_DOUBLE(ulong_dpp8(AS_ULONG(X),S))
83-
#define half_dpp8(X,S) AS_HALF((ushort)uint_dpp8((uint)AS_USHORT(X),S))
72+
#define ulong_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
73+
#define int_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
74+
#define long_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
75+
#define float_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
76+
#define double_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
77+
#define half_dpp8(X,S) __builtin_amdgcn_mov_dpp8(X,S)
8478

8579
// permlane16
8680
#define uint_permlane16(ID,X,S0,S1,W) __builtin_amdgcn_permlane16(ID,X,S0,S1,false,W)

0 commit comments

Comments
 (0)