Skip to content

Commit 6a82721

Browse files
committed
Fix build, add shuffle
1 parent 6f4233b commit 6a82721

File tree

6 files changed

+51
-10
lines changed

6 files changed

+51
-10
lines changed

llvm/lib/Transforms/Instrumentation/GPUSan.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,6 @@ void GPUSanImpl::instrumentAccess(LoopInfo &LI, Instruction &I, int PtrIdx,
618618
if (Loop *L = LI.getLoopFor(I.getParent())) {
619619
auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(*I.getFunction());
620620
const auto &LD = SE.getLoopDisposition(SE.getSCEVAtScope(PtrOp, L), L);
621-
LD->
622621
}
623622

624623
static int32_t ReadAccessId = -1;

offload/DeviceRTL/include/DeviceUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ namespace utils {
2222

2323
/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
2424
/// is identified by \p Mask.
25-
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
25+
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
2626

2727
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
2828

offload/DeviceRTL/src/Mapping.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,18 @@ uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
376376
return utils::ballotSync(mask, pred);
377377
}
378378

379+
#define _TGT_KERNEL_LANGUAGE_SHFL_SYNC(TYPE, TY) \
380+
int ompx_shfl_sync_##TY(uint64_t mask, TYPE var, int src, int width) { \
381+
return utils::shuffle(mask, var, src, width); \
382+
}
383+
384+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(int, i)
385+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(float, f)
386+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(long, l)
387+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(double, d)
388+
389+
#undef _TGT_KERNEL_LANGUAGE_SHFL_SYNC
390+
379391
int ompx_shfl_down_sync_i(uint64_t mask, int var, unsigned delta, int width) {
380392
return utils::shuffleDown(mask, var, delta, width);
381393
}

offload/DeviceRTL/src/Utils.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
3434
return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
3535
}
3636

37-
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
37+
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
3838
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
3939
int32_t Width);
4040

@@ -45,8 +45,7 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred);
4545
///{
4646
#pragma omp begin declare variant match(device = {arch(amdgcn)})
4747

48-
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
49-
int Width = mapping::getWarpSize();
48+
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
5049
int Self = mapping::getThreadIdInWarp();
5150
int Index = SrcLane + (Self & ~(Width - 1));
5251
return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
@@ -82,8 +81,8 @@ bool isThreadLocalMemPtr(const void *Ptr) {
8281
device = {arch(nvptx, nvptx64)}, \
8382
implementation = {extension(match_any)})
8483

85-
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
86-
return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
84+
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
85+
return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width);
8786
}
8887

8988
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
@@ -111,8 +110,9 @@ void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
111110
impl::Unpack(Val, &LowBits, &HighBits);
112111
}
113112

114-
int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
115-
return impl::shuffle(Mask, Var, SrcLane);
113+
int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
114+
int32_t Width) {
115+
return impl::shuffle(Mask, Var, SrcLane, Width);
116116
}
117117

118118
int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,

offload/DeviceRTL/src/Workshare.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
348348
if (rank == 0) {
349349
warp_res = atomic::add(&Cnt, change, atomic::seq_cst);
350350
}
351-
warp_res = utils::shuffle(active, warp_res, leader);
351+
warp_res = utils::shuffle(active, warp_res, leader, mapping::getWarpSize());
352352
return warp_res + rank;
353353
}
354354

openmp/runtime/src/include/ompx.h.var

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,20 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
139139

140140
uint64_t ompx_ballot_sync(uint64_t mask, int pred);
141141

142+
/// ompx_shfl_sync_{i,f,l,d}
143+
///{
144+
#define _TGT_KERNEL_LANGUAGE_SHFL_SYNC(TYPE, TY) \
145+
TYPE ompx_shfl_sync_##TY(uint64_t mask, TYPE var, int src, \
146+
int width);
147+
148+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(int, i)
149+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(float, f)
150+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(long, l)
151+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(double, d)
152+
153+
#undef _TGT_KERNEL_LANGUAGE_SHFL_SYNC
154+
///}
155+
142156
/// ompx_shfl_down_sync_{i,f,l,d}
143157
///{
144158
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
@@ -208,6 +222,22 @@ static inline uint64_t ballot_sync(uint64_t mask, int pred) {
208222
return ompx_ballot_sync(mask, pred);
209223
}
210224

225+
/// shfl_sync
226+
///{
227+
#define _TGT_KERNEL_LANGUAGE_SHFL_SYNC(TYPE, TY) \
228+
static inline TYPE shfl_sync(uint64_t mask, TYPE var, int src, \
229+
int width = __WARP_SIZE) { \
230+
return ompx_shfl_down_sync_##TY(mask, var, int, width); \
231+
}
232+
233+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(int, i)
234+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(float, f)
235+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(long, l)
236+
_TGT_KERNEL_LANGUAGE_SHFL_SYNC(double, d)
237+
238+
#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
239+
///}
240+
211241
/// shfl_down_sync
212242
///{
213243
#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \

0 commit comments

Comments
 (0)