Skip to content

Commit c4d1a88

Browse files
authored
Merge pull request #797 from u-s/feature/copy-with-wait-list
Added wait_list parameter to "copy" functions
2 parents 8c7412f + 3de4bba commit c4d1a88

File tree

6 files changed

+128
-70
lines changed

6 files changed

+128
-70
lines changed

include/boost/compute/algorithm/copy.hpp

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ dispatch_copy_async(InputIterator first,
103103
InputIterator last,
104104
OutputIterator result,
105105
command_queue &queue,
106+
const wait_list &events,
106107
typename boost::enable_if<
107108
mpl::and_<
108109
mpl::not_<
@@ -118,7 +119,7 @@ dispatch_copy_async(InputIterator first,
118119
"copy_async() is only supported for contiguous host iterators"
119120
);
120121

121-
return copy_to_device_async(first, last, result, queue);
122+
return copy_to_device_async(first, last, result, queue, events);
122123
}
123124

124125
// host -> device (async)
@@ -129,6 +130,7 @@ dispatch_copy_async(InputIterator first,
129130
InputIterator last,
130131
OutputIterator result,
131132
command_queue &queue,
133+
const wait_list &events,
132134
typename boost::enable_if<
133135
mpl::and_<
134136
mpl::not_<
@@ -167,7 +169,7 @@ dispatch_copy_async(InputIterator first,
167169
context
168170
);
169171
return copy_on_device_async(
170-
mapped_host.begin(), mapped_host.end(), result, queue
172+
mapped_host.begin(), mapped_host.end(), result, queue, events
171173
);
172174
}
173175

@@ -179,6 +181,7 @@ dispatch_copy(InputIterator first,
179181
InputIterator last,
180182
OutputIterator result,
181183
command_queue &queue,
184+
const wait_list &events,
182185
typename boost::enable_if<
183186
mpl::and_<
184187
mpl::not_<
@@ -190,7 +193,7 @@ dispatch_copy(InputIterator first,
190193
>
191194
>::type* = 0)
192195
{
193-
return copy_to_device(first, last, result, queue);
196+
return copy_to_device(first, last, result, queue, events);
194197
}
195198

196199
// host -> device
@@ -202,6 +205,7 @@ dispatch_copy(InputIterator first,
202205
InputIterator last,
203206
OutputIterator result,
204207
command_queue &queue,
208+
const wait_list &events,
205209
typename boost::enable_if<
206210
mpl::and_<
207211
mpl::not_<
@@ -258,13 +262,15 @@ dispatch_copy(InputIterator first,
258262

259263
// [0; map_copy_threshold) -> copy_to_device_map()
260264
if(input_size_bytes < map_copy_threshold) {
261-
return copy_to_device_map(first, last, result, queue);
265+
return copy_to_device_map(first, last, result, queue, events);
262266
}
263267
// [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
264268
// on host and then perform copy_to_device()
265269
else if(input_size_bytes < direct_copy_threshold) {
266270
std::vector<output_type> vector(first, last);
267-
return copy_to_device(vector.begin(), vector.end(), result, queue);
271+
return copy_to_device(
272+
vector.begin(), vector.end(), result, queue, events
273+
);
268274
}
269275

270276
// [direct_copy_threshold; inf) -> map [first; last) to device and
@@ -275,7 +281,7 @@ dispatch_copy(InputIterator first,
275281
// return the result.
276282
// At this point we are sure that count > 1 (first != last), so event
277283
// returned by dispatch_copy_async() must be valid.
278-
return dispatch_copy_async(first, last, result, queue).get();
284+
return dispatch_copy_async(first, last, result, queue, events).get();
279285
}
280286

281287
// host -> device
@@ -286,6 +292,7 @@ dispatch_copy(InputIterator first,
286292
InputIterator last,
287293
OutputIterator result,
288294
command_queue &queue,
295+
const wait_list &events,
289296
typename boost::enable_if<
290297
mpl::and_<
291298
mpl::not_<
@@ -345,12 +352,12 @@ dispatch_copy(InputIterator first,
345352
// copy_to_device_map() is used for every input
346353
if(input_size_bytes < map_copy_threshold
347354
|| direct_copy_threshold <= map_copy_threshold) {
348-
return copy_to_device_map(first, last, result, queue);
355+
return copy_to_device_map(first, last, result, queue, events);
349356
}
350357
// [map_copy_threshold; inf) -> convert [first; last)
351358
// on host and then perform copy_to_device()
352359
std::vector<output_type> vector(first, last);
353-
return copy_to_device(vector.begin(), vector.end(), result, queue);
360+
return copy_to_device(vector.begin(), vector.end(), result, queue, events);
354361
}
355362

356363
// device -> host (async)
@@ -360,6 +367,7 @@ dispatch_copy_async(InputIterator first,
360367
InputIterator last,
361368
OutputIterator result,
362369
command_queue &queue,
370+
const wait_list &events,
363371
typename boost::enable_if<
364372
mpl::and_<
365373
is_device_iterator<InputIterator>,
@@ -375,7 +383,7 @@ dispatch_copy_async(InputIterator first,
375383
"copy_async() is only supported for contiguous host iterators"
376384
);
377385

378-
return copy_to_host_async(first, last, result, queue);
386+
return copy_to_host_async(first, last, result, queue, events);
379387
}
380388

381389
// device -> host (async)
@@ -386,6 +394,7 @@ dispatch_copy_async(InputIterator first,
386394
InputIterator last,
387395
OutputIterator result,
388396
command_queue &queue,
397+
const wait_list &events,
389398
typename boost::enable_if<
390399
mpl::and_<
391400
is_device_iterator<InputIterator>,
@@ -426,7 +435,8 @@ dispatch_copy_async(InputIterator first,
426435
first,
427436
last,
428437
make_buffer_iterator<output_type>(mapped_host),
429-
queue
438+
queue,
439+
events
430440
);
431441
// update host memory asynchronously by maping and unmaping memory
432442
event map_event;
@@ -451,6 +461,7 @@ dispatch_copy(InputIterator first,
451461
InputIterator last,
452462
OutputIterator result,
453463
command_queue &queue,
464+
const wait_list &events,
454465
typename boost::enable_if<
455466
mpl::and_<
456467
is_device_iterator<InputIterator>,
@@ -465,7 +476,7 @@ dispatch_copy(InputIterator first,
465476
>
466477
>::type* = 0)
467478
{
468-
return copy_to_host(first, last, result, queue);
479+
return copy_to_host(first, last, result, queue, events);
469480
}
470481

471482
// device -> host
@@ -478,6 +489,7 @@ dispatch_copy(InputIterator first,
478489
InputIterator last,
479490
OutputIterator result,
480491
command_queue &queue,
492+
const wait_list &events,
481493
typename boost::enable_if<
482494
mpl::and_<
483495
is_device_iterator<InputIterator>,
@@ -540,12 +552,12 @@ dispatch_copy(InputIterator first,
540552
// copy_to_host_map() is used for every input
541553
if(input_size_bytes < map_copy_threshold
542554
|| direct_copy_threshold <= map_copy_threshold) {
543-
return copy_to_host_map(first, last, result, queue);
555+
return copy_to_host_map(first, last, result, queue, events);
544556
}
545557
// [map_copy_threshold; inf) -> copy [first;last) to temporary vector
546558
// then copy (and convert) to result using std::copy()
547559
std::vector<input_type> vector(count);
548-
copy_to_host(first, last, vector.begin(), queue);
560+
copy_to_host(first, last, vector.begin(), queue, events);
549561
return std::copy(vector.begin(), vector.end(), result);
550562
}
551563

@@ -559,6 +571,7 @@ dispatch_copy(InputIterator first,
559571
InputIterator last,
560572
OutputIterator result,
561573
command_queue &queue,
574+
const wait_list &events,
562575
typename boost::enable_if<
563576
mpl::and_<
564577
is_device_iterator<InputIterator>,
@@ -618,13 +631,13 @@ dispatch_copy(InputIterator first,
618631

619632
// [0; map_copy_threshold) -> copy_to_host_map()
620633
if(input_size_bytes < map_copy_threshold) {
621-
return copy_to_host_map(first, last, result, queue);
634+
return copy_to_host_map(first, last, result, queue, events);
622635
}
623636
// [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
624637
// temporary vector then copy (and convert) to result using std::copy()
625638
else if(input_size_bytes < direct_copy_threshold) {
626639
std::vector<input_type> vector(count);
627-
copy_to_host(first, last, vector.begin(), queue);
640+
copy_to_host(first, last, vector.begin(), queue, events);
628641
return std::copy(vector.begin(), vector.end(), result);
629642
}
630643

@@ -636,7 +649,7 @@ dispatch_copy(InputIterator first,
636649
// return the result.
637650
// At this point we are sure that count > 1 (first != last), so event
638651
// returned by dispatch_copy_async() must be valid.
639-
return dispatch_copy_async(first, last, result, queue).get();
652+
return dispatch_copy_async(first, last, result, queue, events).get();
640653
}
641654

642655
// device -> device
@@ -646,6 +659,7 @@ dispatch_copy(InputIterator first,
646659
InputIterator last,
647660
OutputIterator result,
648661
command_queue &queue,
662+
const wait_list &events,
649663
typename boost::enable_if<
650664
mpl::and_<
651665
is_device_iterator<InputIterator>,
@@ -658,7 +672,7 @@ dispatch_copy(InputIterator first,
658672
>
659673
>::type* = 0)
660674
{
661-
return copy_on_device(first, last, result, queue);
675+
return copy_on_device(first, last, result, queue, events);
662676
}
663677

664678
// device -> device (specialization for buffer iterators)
@@ -668,6 +682,7 @@ dispatch_copy(InputIterator first,
668682
InputIterator last,
669683
OutputIterator result,
670684
command_queue &queue,
685+
const wait_list &events,
671686
typename boost::enable_if<
672687
mpl::and_<
673688
is_device_iterator<InputIterator>,
@@ -691,7 +706,8 @@ dispatch_copy(InputIterator first,
691706
result.get_buffer(),
692707
first.get_index() * sizeof(value_type),
693708
result.get_index() * sizeof(value_type),
694-
static_cast<size_t>(n) * sizeof(value_type));
709+
static_cast<size_t>(n) * sizeof(value_type),
710+
events);
695711
return result + n;
696712
}
697713

@@ -702,6 +718,7 @@ dispatch_copy_async(InputIterator first,
702718
InputIterator last,
703719
OutputIterator result,
704720
command_queue &queue,
721+
const wait_list &events,
705722
typename boost::enable_if<
706723
mpl::and_<
707724
is_device_iterator<InputIterator>,
@@ -714,7 +731,7 @@ dispatch_copy_async(InputIterator first,
714731
>
715732
>::type* = 0)
716733
{
717-
return copy_on_device_async(first, last, result, queue);
734+
return copy_on_device_async(first, last, result, queue, events);
718735
}
719736

720737
// device -> device (async, specialization for buffer iterators)
@@ -724,6 +741,7 @@ dispatch_copy_async(InputIterator first,
724741
InputIterator last,
725742
OutputIterator result,
726743
command_queue &queue,
744+
const wait_list &events,
727745
typename boost::enable_if<
728746
mpl::and_<
729747
is_device_iterator<InputIterator>,
@@ -749,7 +767,8 @@ dispatch_copy_async(InputIterator first,
749767
result.get_buffer(),
750768
first.get_index() * sizeof(value_type),
751769
result.get_index() * sizeof(value_type),
752-
static_cast<size_t>(n) * sizeof(value_type)
770+
static_cast<size_t>(n) * sizeof(value_type),
771+
events
753772
);
754773

755774
return make_future(result + n, event_);
@@ -762,12 +781,14 @@ dispatch_copy(InputIterator first,
762781
InputIterator last,
763782
OutputIterator result,
764783
command_queue &queue,
784+
const wait_list &events,
765785
typename boost::enable_if_c<
766786
!is_device_iterator<InputIterator>::value &&
767787
!is_device_iterator<OutputIterator>::value
768788
>::type* = 0)
769789
{
770790
(void) queue;
791+
(void) events;
771792

772793
return std::copy(first, last, result);
773794
}
@@ -833,9 +854,10 @@ template<class InputIterator, class OutputIterator>
833854
inline OutputIterator copy(InputIterator first,
834855
InputIterator last,
835856
OutputIterator result,
836-
command_queue &queue = system::default_queue())
857+
command_queue &queue = system::default_queue(),
858+
const wait_list &events = wait_list())
837859
{
838-
return detail::dispatch_copy(first, last, result, queue);
860+
return detail::dispatch_copy(first, last, result, queue, events);
839861
}
840862

841863
/// Copies the values in the range [\p first, \p last) to the range
@@ -847,9 +869,10 @@ inline future<OutputIterator>
847869
copy_async(InputIterator first,
848870
InputIterator last,
849871
OutputIterator result,
850-
command_queue &queue = system::default_queue())
872+
command_queue &queue = system::default_queue(),
873+
const wait_list &events = wait_list())
851874
{
852-
return detail::dispatch_copy_async(first, last, result, queue);
875+
return detail::dispatch_copy_async(first, last, result, queue, events);
853876
}
854877

855878
} // end compute namespace

include/boost/compute/algorithm/copy_n.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,16 @@ template<class InputIterator, class Size, class OutputIterator>
3737
inline OutputIterator copy_n(InputIterator first,
3838
Size count,
3939
OutputIterator result,
40-
command_queue &queue = system::default_queue())
40+
command_queue &queue = system::default_queue(),
41+
const wait_list &events = wait_list())
4142
{
4243
typedef typename std::iterator_traits<InputIterator>::difference_type difference_type;
4344

4445
return ::boost::compute::copy(first,
4546
first + static_cast<difference_type>(count),
4647
result,
47-
queue);
48+
queue,
49+
events);
4850
}
4951

5052
} // end compute namespace

0 commit comments

Comments
 (0)