@@ -116,19 +116,20 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
116
116
{
117
117
size_t n_groups = ceiling_quotient (n_elems, n_wi * wg_size);
118
118
119
- sycl::event inc_scan_phase1_ev = exec_q.submit ([&](sycl::handler &cgh) {
120
- cgh.depends_on (depends);
119
+ const sycl::event &inc_scan_phase1_ev =
120
+ exec_q.submit ([&](sycl::handler &cgh) {
121
+ cgh.depends_on (depends);
121
122
122
- using slmT = sycl::local_accessor<size_t , 1 >;
123
+ using slmT = sycl::local_accessor<size_t , 1 >;
123
124
124
- auto lws = sycl::range<1 >(wg_size);
125
- auto gws = sycl::range<1 >(n_groups * wg_size);
125
+ auto lws = sycl::range<1 >(wg_size);
126
+ auto gws = sycl::range<1 >(n_groups * wg_size);
126
127
127
- slmT slm_iscan_tmp (lws, cgh);
128
+ slmT slm_iscan_tmp (lws, cgh);
128
129
129
130
cgh.parallel_for <class inclusive_scan_rec_local_scan_krn <
130
131
inputT, outputT, n_wi, IndexerT, decltype (transformer)>>(
131
- sycl::nd_range<1 >(gws, lws), [=](sycl::nd_item<1 > it)
132
+ sycl::nd_range<1 >(gws, lws), [=, slm_iscan_tmp = std::move (slm_iscan_tmp) ](sycl::nd_item<1 > it)
132
133
{
133
134
auto chunk_gid = it.get_global_id (0 );
134
135
auto lid = it.get_local_id (0 );
@@ -172,7 +173,7 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
172
173
output[i + m_wi] = local_isum[m_wi];
173
174
}
174
175
});
175
- });
176
+ });
176
177
177
178
sycl::event out_event = inc_scan_phase1_ev;
178
179
if (n_groups > 1 ) {
@@ -203,11 +204,11 @@ sycl::event inclusive_scan_rec(sycl::queue &exec_q,
203
204
204
205
sycl::event e4 = exec_q.submit ([&](sycl::handler &cgh) {
205
206
cgh.depends_on (e3 );
206
- auto ctx = exec_q.get_context ();
207
+ const auto & ctx = exec_q.get_context ();
207
208
cgh.host_task ([ctx, temp]() { sycl::free (temp, ctx); });
208
209
});
209
210
210
- out_event = e4 ;
211
+ out_event = std::move ( e4 ) ;
211
212
}
212
213
213
214
return out_event;
@@ -235,7 +236,7 @@ size_t accumulate_contig_impl(sycl::queue &q,
235
236
NoOpIndexer flat_indexer{};
236
237
transformerT non_zero_indicator{};
237
238
238
- sycl::event comp_ev =
239
+ const sycl::event & comp_ev =
239
240
inclusive_scan_rec<maskT, cumsumT, n_wi, decltype (flat_indexer),
240
241
decltype (non_zero_indicator)>(
241
242
q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0 , 1 ,
@@ -321,7 +322,7 @@ size_t accumulate_strided_impl(sycl::queue &q,
321
322
StridedIndexer strided_indexer{nd, 0 , shape_strides};
322
323
transformerT non_zero_indicator{};
323
324
324
- sycl::event comp_ev =
325
+ const sycl::event & comp_ev =
325
326
inclusive_scan_rec<maskT, cumsumT, n_wi, decltype (strided_indexer),
326
327
decltype (non_zero_indicator)>(
327
328
q, n_elems, wg_size, mask_data_ptr, cumsum_data_ptr, 0 , 1 ,
0 commit comments