Skip to content

Commit 7cb267a

Browse files
adarshyogaDiptorup Deb
authored andcommitted
tune gpairs params
1 parent 8128dd2 commit 7cb267a

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

dpbench/benchmarks/gpairs/gpairs_numba_dpex_k.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
3737
lws0 = nd_item.get_local_range(0)
3838
lws1 = nd_item.get_local_range(1)
3939

40-
n_wi = 20
40+
n_wi = 32
4141

42-
dsq_mat = dpex.private.array(shape=(20 * 20), dtype=dtype)
43-
w0_vec = dpex.private.array(shape=(20), dtype=dtype)
44-
w1_vec = dpex.private.array(shape=(20), dtype=dtype)
42+
dsq_mat = dpex.private.array(shape=(32 * 32), dtype=dtype)
43+
w0_vec = dpex.private.array(shape=(32), dtype=dtype)
44+
w1_vec = dpex.private.array(shape=(32), dtype=dtype)
4545

4646
offset0 = gr0 * n_wi * lws0 + lid0
4747
offset1 = gr1 * n_wi * lws1 + lid1
@@ -81,7 +81,7 @@ def count_weighted_pairs_3d_intel_no_slm_ker(
8181

8282
# update slm_hist. Use work-item private buffer of 16 tfloat elements
8383
for k in range(0, slm_hist_size, private_hist_size):
84-
private_hist = dpex.private.array(shape=(16), dtype=dtype)
84+
private_hist = dpex.private.array(shape=(32), dtype=dtype)
8585
for p in range(private_hist_size):
8686
private_hist[p] = 0.0
8787

@@ -133,8 +133,8 @@ def gpairs(
133133
rbins,
134134
results,
135135
):
136-
n_wi = 20
137-
private_hist_size = 16
136+
n_wi = 32
137+
private_hist_size = 32
138138
lws0 = 16
139139
lws1 = 16
140140

dpbench/benchmarks/gpairs/gpairs_sycl_native_ext/gpairs_sycl/_gpairs_kernel.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ sycl::event gpairs_impl(sycl::queue q,
2525
FpTy *hist)
2626
{
2727

28-
const unsigned int n_wi = 20, private_hist_size = 16, lws0 = 16, lws1 = 16;
28+
const unsigned int n_wi = 32, private_hist_size = 32, lws0 = 16, lws1 = 16;
2929
const size_t m0 = static_cast<size_t>(n_wi) * static_cast<size_t>(lws0);
3030
const size_t m1 = static_cast<size_t>(n_wi) * static_cast<size_t>(lws1);
3131
const size_t n_groups0 = ceiling_quotient(n, m0);

0 commit comments

Comments
 (0)