Skip to content

Commit 732022d

Browse files
authored
Fix sycl lint error and tests (#13)
* fix sycl nd Signed-off-by: jiqing-feng <[email protected]> * fix tests Signed-off-by: jiqing-feng <[email protected]> --------- Signed-off-by: jiqing-feng <[email protected]>
1 parent 883d693 commit 732022d

File tree

5 files changed

+11
-11
lines changed

5 files changed

+11
-11
lines changed

csrc/xpu_kernels.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ inline float dDequantizeNF4(unsigned char val) {
9494
}
9595

9696
template <typename T, int TILE_SIZE, int NUM_PER_TH, int DATA_TYPE>
97-
SYCL_EXTERNAL void kDequantizeBlockwise<T, TILE_SIZE, NUM_PER_TH, DATA_TYPE>::operator()(sycl::and_item<1> item) const {
97+
SYCL_EXTERNAL void kDequantizeBlockwise<T, TILE_SIZE, NUM_PER_TH, DATA_TYPE>::operator()(sycl::nd_item<1> item) const {
9898
const int base_idx = item.get_group(0) * TILE_SIZE;
9999
size_t local_idx = item.get_local_id(0) * NUM_PER_TH;
100100
float local_abs_max = -FLT_MAX;
@@ -172,7 +172,7 @@ SYCL_EXTERNAL void kDequantizeBlockwise<T, TILE_SIZE, NUM_PER_TH, DATA_TYPE>::op
172172

173173
template <typename T, size_t GROUP_SIZE, size_t NUM_PER_THREAD, size_t SUBG_SIZE, int BITS>
174174
SYCL_EXTERNAL void
175-
kgemv_4bit_inference<T, GROUP_SIZE, NUM_PER_THREAD, SUBG_SIZE, BITS>::operator()(sycl::and_item<1> item) const {
175+
kgemv_4bit_inference<T, GROUP_SIZE, NUM_PER_THREAD, SUBG_SIZE, BITS>::operator()(sycl::nd_item<1> item) const {
176176
size_t idx = item.get_local_id();
177177
const int sg_idx = idx / SUBG_SIZE;
178178
const int sg_lane = idx % SUBG_SIZE;

csrc/xpu_kernels.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
template <typename T, int TILE_SIZE, int NUM_PER_TH, int DATA_TYPE> class kDequantizeBlockwise {
88
public:
9-
SYCL_EXTERNAL void operator()(sycl::and_item<1> item) const;
9+
SYCL_EXTERNAL void operator()(sycl::nd_item<1> item) const;
1010

1111
kDequantizeBlockwise(float* code_, uint8_t* A_, float* absmax_, T* out_, const int blocksize_, const int n_)
1212
: code(code_), A(A_), absmax(absmax_), out(out_), blocksize(blocksize_), n(n_) {}
@@ -22,7 +22,7 @@ template <typename T, int TILE_SIZE, int NUM_PER_TH, int DATA_TYPE> class kDequa
2222

2323
template <typename T, size_t GROUP_SIZE, size_t NUM_PER_THREAD, size_t SUBG_SIZE, int BITS> class kgemv_4bit_inference {
2424
public:
25-
SYCL_EXTERNAL void operator()(sycl::and_item<1> item) const;
25+
SYCL_EXTERNAL void operator()(sycl::nd_item<1> item) const;
2626

2727
kgemv_4bit_inference(
2828
int M_, int N_, int K_, T* A_, unsigned char* B_, float* absmax_, const float* datatype_, T* out_, int lda_,

csrc/xpu_ops.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ void dequantizeBlockwise(
1616
sycl::range<1> global_range{(size_t)workgroup_num * (size_t)workgroup_size};
1717
kDequantizeBlockwise<T, tile_size, num_per_th, DATA_TYPE> kfn(code, A, absmax, out, blocksize / 2, n);
1818
sycl_kernel_submit<decltype(kfn), 1, 32>(
19-
sycl::and_range<1>(sycl::range<1>(global_range), sycl::range<1>(local_range)), queue, kfn
19+
sycl::nd_range<1>(sycl::range<1>(global_range), sycl::range<1>(local_range)), queue, kfn
2020
);
2121
} else {
2222
const int workgroup_num = (n + tile_size - 1) / tile_size;
2323
sycl::range<1> local_range{(size_t)workgroup_size};
2424
sycl::range<1> global_range{(size_t)workgroup_num * (size_t)workgroup_size};
2525
kDequantizeBlockwise<T, tile_size, num_per_th, DATA_TYPE> kfn(code, A, absmax, out, blocksize, n);
2626
sycl_kernel_submit<decltype(kfn), 1, 32>(
27-
sycl::and_range<1>(sycl::range<1>(global_range), sycl::range<1>(local_range)), queue, kfn
27+
sycl::nd_range<1>(sycl::range<1>(global_range), sycl::range<1>(local_range)), queue, kfn
2828
);
2929
}
3030
}
@@ -47,7 +47,7 @@ void gemv_4bit_inference(
4747
);
4848

4949
sycl_comp_kernel_submit<decltype(kfn), 1, SUBG_SIZE>(
50-
sycl::and_range<1>(sycl::range<1>(GROUP_SIZE * workgroup_num), sycl::range<1>(GROUP_SIZE)), queue, kfn
50+
sycl::nd_range<1>(sycl::range<1>(GROUP_SIZE * workgroup_num), sycl::range<1>(GROUP_SIZE)), queue, kfn
5151
);
5252
}
5353

csrc/xpu_ops.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
#include <sycl/sycl.hpp>
1313

1414
template <typename ker_t, int dim, int subgroup_size>
15-
static inline void sycl_kernel_submit(sycl::and_range<dim> range, sycl::queue q, ker_t ker) {
15+
static inline void sycl_kernel_submit(sycl::nd_range<dim> range, sycl::queue q, ker_t ker) {
1616
auto cgf = [&](::sycl::handler& cgh)
1717
[[sycl::reqd_sub_group_size(subgroup_size)]] { cgh.parallel_for<ker_t>(range, ker); };
1818
q.submit(cgf);
1919
}
2020

2121
template <typename ker_t, int dim, int subgroup_size>
22-
static inline void sycl_comp_kernel_submit(sycl::and_range<dim> range, sycl::queue q, ker_t ker) {
22+
static inline void sycl_comp_kernel_submit(sycl::nd_range<dim> range, sycl::queue q, ker_t ker) {
2323
auto cgf = [&](::sycl::handler& cgh) [[sycl::reqd_sub_group_size(subgroup_size)]] {
2424
ker.sycl_ker_local_memory_creation(cgh);
2525
cgh.parallel_for<ker_t>(range, ker);

tests/test_functional.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,8 +1238,8 @@ def test_gemv_4bit(self, device, dim, dtype, storage_type, quant_storage, double
12381238
max_errs3 = []
12391239

12401240
# Large number of iterations is excessive and slow on CPU.
1241-
# Keep for CUDA for now.
1242-
iters = 100 if device == "cuda" else 10
1241+
# Keep for CUDA/XPU for now.
1242+
iters = 10 if device == "cpu" else 100
12431243

12441244
for i in range(iters):
12451245
if kind == "fc1":

0 commit comments

Comments
 (0)