Skip to content

Commit a7df1e1

Browse files
author
cmc-rep
authored
[SYCL][ESIMD] Change API based on the feedback from users (#2476)
Signed-off-by: Gang Y Chen <[email protected]>
1 parent 638b71b commit a7df1e1

File tree

4 files changed

+17
-14
lines changed

4 files changed

+17
-14
lines changed

sycl/include/CL/sycl/INTEL/esimd/esimd_memory.hpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -421,11 +421,6 @@ ESIMD_NODEBUG ESIMD_INLINE
421421
/// generic work-group barrier
422422
inline ESIMD_NODEBUG void esimd_barrier() { __esimd_barrier(); }
423423

424-
/// SLM functions
425-
426-
/// declare per-work-group slm size
427-
SYCL_EXTERNAL void slm_init(uint32_t size);
428-
429424
enum EsimdFenceMask {
430425
ESIMD_GLOBAL_COHERENT_FENCE = 0x1,
431426
ESIMD_L3_FLUSH_INSTRUCTIONS = 0x2,
@@ -437,8 +432,15 @@ enum EsimdFenceMask {
437432
ESIMD_SW_BARRIER = 0x80
438433
};
439434

440-
/// slm_fence sets the SLM read/write order
441-
inline ESIMD_NODEBUG void slm_fence(uint8_t cntl) { __esimd_slm_fence(cntl); }
435+
/// esimd_fence sets the memory read/write order
436+
ESIMD_INLINE ESIMD_NODEBUG void esimd_fence(uint8_t cntl) {
437+
__esimd_slm_fence(cntl);
438+
}
439+
440+
/// SLM functions
441+
442+
/// declare per-work-group slm size
443+
SYCL_EXTERNAL void slm_init(uint32_t size);
442444

443445
/// SLM gather
444446
/// only allow simd-16 and simd-32
@@ -459,18 +461,19 @@ ESIMD_INLINE ESIMD_NODEBUG
459461
}
460462

461463
/// SLM gather4
462-
/// only allow simd-16 and simd-32
464+
/// only allow simd-8, simd-16 and simd-32
463465
template <typename T, int n, ChannelMaskType Mask>
464466
ESIMD_INLINE ESIMD_NODEBUG
465-
typename std::enable_if<(n == 16 || n == 32) && (sizeof(T) == 4),
467+
typename std::enable_if<(n == 8 || n == 16 || n == 32) && (sizeof(T) == 4),
466468
simd<T, n * NumChannels(Mask)>>::type
467469
slm_load4(simd<uint32_t, n> offsets, simd<uint16_t, n> pred = 1) {
468470
return __esimd_slm_read4<T, n, Mask>(offsets.data(), pred.data());
469471
}
470472

471473
/// SLM scatter4
472474
template <typename T, int n, ChannelMaskType Mask>
473-
typename std::enable_if<(n == 16 || n == 32) && (sizeof(T) == 4), void>::type
475+
typename std::enable_if<(n == 8 || n == 16 || n == 32) && (sizeof(T) == 4),
476+
void>::type
474477
slm_store4(simd<T, n * NumChannels(Mask)> vals, simd<uint32_t, n> offsets,
475478
simd<uint16_t, n> pred = 1) {
476479
__esimd_slm_write4<T, n, Mask>(offsets.data(), vals.data(), pred.data());

sycl/test/basic_tests/esimd/slm_load.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ void kernel() __attribute__((sycl_device)) {
1515

1616
auto v0 = slm_load<int, 32>(offsets);
1717

18-
slm_fence(3);
18+
esimd_fence(3);
1919
esimd_barrier();
2020

2121
v0 = v0 + v1;

sycl/test/esimd/on-device/histogram_256_slm.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
3838
slm_offset *= sizeof(int);
3939
simd<uint, 16> slm_data = 0;
4040
slm_store<uint, 16>(slm_data, slm_offset);
41-
slm_fence(ESIMD_GLOBAL_COHERENT_FENCE);
41+
esimd_fence(ESIMD_GLOBAL_COHERENT_FENCE);
4242
esimd_barrier();
4343

4444
// Each thread handles NUM_BLOCKSxBLOCK_WIDTH pixel blocks
@@ -57,7 +57,7 @@ ESIMD_INLINE void histogram_atomic(const uint32_t *input_ptr, uint32_t *output,
5757
}
5858
start_off += BLOCK_WIDTH;
5959
}
60-
slm_fence(ESIMD_GLOBAL_COHERENT_FENCE);
60+
esimd_fence(ESIMD_GLOBAL_COHERENT_FENCE);
6161
esimd_barrier();
6262

6363
// Update global sum by atomically adding each local histogram

sycl/test/esimd/on-device/slm_barrier.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ void load_to_slm(uint grpSize, uint localId, uint slmOffset, char *addr,
6969
vOffsets += (grpSize * 256);
7070
}
7171

72-
slm_fence(ESIMD_GLOBAL_COHERENT_FENCE);
72+
esimd_fence(ESIMD_GLOBAL_COHERENT_FENCE);
7373
esimd_barrier();
7474
}
7575

0 commit comments

Comments
 (0)