@@ -103,6 +103,7 @@ dispatch_copy_async(InputIterator first,
103
103
InputIterator last,
104
104
OutputIterator result,
105
105
command_queue &queue,
106
+ const wait_list &events,
106
107
typename boost::enable_if<
107
108
mpl::and_<
108
109
mpl::not_<
@@ -118,7 +119,7 @@ dispatch_copy_async(InputIterator first,
118
119
" copy_async() is only supported for contiguous host iterators"
119
120
);
120
121
121
- return copy_to_device_async (first, last, result, queue);
122
+ return copy_to_device_async (first, last, result, queue, events );
122
123
}
123
124
124
125
// host -> device (async)
@@ -129,6 +130,7 @@ dispatch_copy_async(InputIterator first,
129
130
InputIterator last,
130
131
OutputIterator result,
131
132
command_queue &queue,
133
+ const wait_list &events,
132
134
typename boost::enable_if<
133
135
mpl::and_<
134
136
mpl::not_<
@@ -167,7 +169,7 @@ dispatch_copy_async(InputIterator first,
167
169
context
168
170
);
169
171
return copy_on_device_async (
170
- mapped_host.begin (), mapped_host.end (), result, queue
172
+ mapped_host.begin (), mapped_host.end (), result, queue, events
171
173
);
172
174
}
173
175
@@ -179,6 +181,7 @@ dispatch_copy(InputIterator first,
179
181
InputIterator last,
180
182
OutputIterator result,
181
183
command_queue &queue,
184
+ const wait_list &events,
182
185
typename boost::enable_if<
183
186
mpl::and_<
184
187
mpl::not_<
@@ -190,7 +193,7 @@ dispatch_copy(InputIterator first,
190
193
>
191
194
>::type* = 0 )
192
195
{
193
- return copy_to_device (first, last, result, queue);
196
+ return copy_to_device (first, last, result, queue, events );
194
197
}
195
198
196
199
// host -> device
@@ -202,6 +205,7 @@ dispatch_copy(InputIterator first,
202
205
InputIterator last,
203
206
OutputIterator result,
204
207
command_queue &queue,
208
+ const wait_list &events,
205
209
typename boost::enable_if<
206
210
mpl::and_<
207
211
mpl::not_<
@@ -258,13 +262,15 @@ dispatch_copy(InputIterator first,
258
262
259
263
// [0; map_copy_threshold) -> copy_to_device_map()
260
264
if (input_size_bytes < map_copy_threshold) {
261
- return copy_to_device_map (first, last, result, queue);
265
+ return copy_to_device_map (first, last, result, queue, events );
262
266
}
263
267
// [map_copy_threshold; direct_copy_threshold) -> convert [first; last)
264
268
// on host and then perform copy_to_device()
265
269
else if (input_size_bytes < direct_copy_threshold) {
266
270
std::vector<output_type> vector (first, last);
267
- return copy_to_device (vector.begin (), vector.end (), result, queue);
271
+ return copy_to_device (
272
+ vector.begin (), vector.end (), result, queue, events
273
+ );
268
274
}
269
275
270
276
// [direct_copy_threshold; inf) -> map [first; last) to device and
@@ -275,7 +281,7 @@ dispatch_copy(InputIterator first,
275
281
// return the result.
276
282
// At this point we are sure that count > 1 (first != last), so event
277
283
// returned by dispatch_copy_async() must be valid.
278
- return dispatch_copy_async (first, last, result, queue).get ();
284
+ return dispatch_copy_async (first, last, result, queue, events ).get ();
279
285
}
280
286
281
287
// host -> device
@@ -286,6 +292,7 @@ dispatch_copy(InputIterator first,
286
292
InputIterator last,
287
293
OutputIterator result,
288
294
command_queue &queue,
295
+ const wait_list &events,
289
296
typename boost::enable_if<
290
297
mpl::and_<
291
298
mpl::not_<
@@ -345,12 +352,12 @@ dispatch_copy(InputIterator first,
345
352
// copy_to_device_map() is used for every input
346
353
if (input_size_bytes < map_copy_threshold
347
354
|| direct_copy_threshold <= map_copy_threshold) {
348
- return copy_to_device_map (first, last, result, queue);
355
+ return copy_to_device_map (first, last, result, queue, events );
349
356
}
350
357
// [map_copy_threshold; inf) -> convert [first; last)
351
358
// on host and then perform copy_to_device()
352
359
std::vector<output_type> vector (first, last);
353
- return copy_to_device (vector.begin (), vector.end (), result, queue);
360
+ return copy_to_device (vector.begin (), vector.end (), result, queue, events );
354
361
}
355
362
356
363
// device -> host (async)
@@ -360,6 +367,7 @@ dispatch_copy_async(InputIterator first,
360
367
InputIterator last,
361
368
OutputIterator result,
362
369
command_queue &queue,
370
+ const wait_list &events,
363
371
typename boost::enable_if<
364
372
mpl::and_<
365
373
is_device_iterator<InputIterator>,
@@ -375,7 +383,7 @@ dispatch_copy_async(InputIterator first,
375
383
" copy_async() is only supported for contiguous host iterators"
376
384
);
377
385
378
- return copy_to_host_async (first, last, result, queue);
386
+ return copy_to_host_async (first, last, result, queue, events );
379
387
}
380
388
381
389
// device -> host (async)
@@ -386,6 +394,7 @@ dispatch_copy_async(InputIterator first,
386
394
InputIterator last,
387
395
OutputIterator result,
388
396
command_queue &queue,
397
+ const wait_list &events,
389
398
typename boost::enable_if<
390
399
mpl::and_<
391
400
is_device_iterator<InputIterator>,
@@ -426,7 +435,8 @@ dispatch_copy_async(InputIterator first,
426
435
first,
427
436
last,
428
437
make_buffer_iterator<output_type>(mapped_host),
429
- queue
438
+ queue,
439
+ events
430
440
);
431
441
// update host memory asynchronously by maping and unmaping memory
432
442
event map_event;
@@ -451,6 +461,7 @@ dispatch_copy(InputIterator first,
451
461
InputIterator last,
452
462
OutputIterator result,
453
463
command_queue &queue,
464
+ const wait_list &events,
454
465
typename boost::enable_if<
455
466
mpl::and_<
456
467
is_device_iterator<InputIterator>,
@@ -465,7 +476,7 @@ dispatch_copy(InputIterator first,
465
476
>
466
477
>::type* = 0 )
467
478
{
468
- return copy_to_host (first, last, result, queue);
479
+ return copy_to_host (first, last, result, queue, events );
469
480
}
470
481
471
482
// device -> host
@@ -478,6 +489,7 @@ dispatch_copy(InputIterator first,
478
489
InputIterator last,
479
490
OutputIterator result,
480
491
command_queue &queue,
492
+ const wait_list &events,
481
493
typename boost::enable_if<
482
494
mpl::and_<
483
495
is_device_iterator<InputIterator>,
@@ -540,12 +552,12 @@ dispatch_copy(InputIterator first,
540
552
// copy_to_host_map() is used for every input
541
553
if (input_size_bytes < map_copy_threshold
542
554
|| direct_copy_threshold <= map_copy_threshold) {
543
- return copy_to_host_map (first, last, result, queue);
555
+ return copy_to_host_map (first, last, result, queue, events );
544
556
}
545
557
// [map_copy_threshold; inf) -> copy [first;last) to temporary vector
546
558
// then copy (and convert) to result using std::copy()
547
559
std::vector<input_type> vector (count);
548
- copy_to_host (first, last, vector.begin (), queue);
560
+ copy_to_host (first, last, vector.begin (), queue, events );
549
561
return std::copy (vector.begin (), vector.end (), result);
550
562
}
551
563
@@ -559,6 +571,7 @@ dispatch_copy(InputIterator first,
559
571
InputIterator last,
560
572
OutputIterator result,
561
573
command_queue &queue,
574
+ const wait_list &events,
562
575
typename boost::enable_if<
563
576
mpl::and_<
564
577
is_device_iterator<InputIterator>,
@@ -618,13 +631,13 @@ dispatch_copy(InputIterator first,
618
631
619
632
// [0; map_copy_threshold) -> copy_to_host_map()
620
633
if (input_size_bytes < map_copy_threshold) {
621
- return copy_to_host_map (first, last, result, queue);
634
+ return copy_to_host_map (first, last, result, queue, events );
622
635
}
623
636
// [map_copy_threshold; direct_copy_threshold) -> copy [first;last) to
624
637
// temporary vector then copy (and convert) to result using std::copy()
625
638
else if (input_size_bytes < direct_copy_threshold) {
626
639
std::vector<input_type> vector (count);
627
- copy_to_host (first, last, vector.begin (), queue);
640
+ copy_to_host (first, last, vector.begin (), queue, events );
628
641
return std::copy (vector.begin (), vector.end (), result);
629
642
}
630
643
@@ -636,7 +649,7 @@ dispatch_copy(InputIterator first,
636
649
// return the result.
637
650
// At this point we are sure that count > 1 (first != last), so event
638
651
// returned by dispatch_copy_async() must be valid.
639
- return dispatch_copy_async (first, last, result, queue).get ();
652
+ return dispatch_copy_async (first, last, result, queue, events ).get ();
640
653
}
641
654
642
655
// device -> device
@@ -646,6 +659,7 @@ dispatch_copy(InputIterator first,
646
659
InputIterator last,
647
660
OutputIterator result,
648
661
command_queue &queue,
662
+ const wait_list &events,
649
663
typename boost::enable_if<
650
664
mpl::and_<
651
665
is_device_iterator<InputIterator>,
@@ -658,7 +672,7 @@ dispatch_copy(InputIterator first,
658
672
>
659
673
>::type* = 0 )
660
674
{
661
- return copy_on_device (first, last, result, queue);
675
+ return copy_on_device (first, last, result, queue, events );
662
676
}
663
677
664
678
// device -> device (specialization for buffer iterators)
@@ -668,6 +682,7 @@ dispatch_copy(InputIterator first,
668
682
InputIterator last,
669
683
OutputIterator result,
670
684
command_queue &queue,
685
+ const wait_list &events,
671
686
typename boost::enable_if<
672
687
mpl::and_<
673
688
is_device_iterator<InputIterator>,
@@ -691,7 +706,8 @@ dispatch_copy(InputIterator first,
691
706
result.get_buffer (),
692
707
first.get_index () * sizeof (value_type),
693
708
result.get_index () * sizeof (value_type),
694
- static_cast <size_t >(n) * sizeof (value_type));
709
+ static_cast <size_t >(n) * sizeof (value_type),
710
+ events);
695
711
return result + n;
696
712
}
697
713
@@ -702,6 +718,7 @@ dispatch_copy_async(InputIterator first,
702
718
InputIterator last,
703
719
OutputIterator result,
704
720
command_queue &queue,
721
+ const wait_list &events,
705
722
typename boost::enable_if<
706
723
mpl::and_<
707
724
is_device_iterator<InputIterator>,
@@ -714,7 +731,7 @@ dispatch_copy_async(InputIterator first,
714
731
>
715
732
>::type* = 0 )
716
733
{
717
- return copy_on_device_async (first, last, result, queue);
734
+ return copy_on_device_async (first, last, result, queue, events );
718
735
}
719
736
720
737
// device -> device (async, specialization for buffer iterators)
@@ -724,6 +741,7 @@ dispatch_copy_async(InputIterator first,
724
741
InputIterator last,
725
742
OutputIterator result,
726
743
command_queue &queue,
744
+ const wait_list &events,
727
745
typename boost::enable_if<
728
746
mpl::and_<
729
747
is_device_iterator<InputIterator>,
@@ -749,7 +767,8 @@ dispatch_copy_async(InputIterator first,
749
767
result.get_buffer (),
750
768
first.get_index () * sizeof (value_type),
751
769
result.get_index () * sizeof (value_type),
752
- static_cast <size_t >(n) * sizeof (value_type)
770
+ static_cast <size_t >(n) * sizeof (value_type),
771
+ events
753
772
);
754
773
755
774
return make_future (result + n, event_);
@@ -762,12 +781,14 @@ dispatch_copy(InputIterator first,
762
781
InputIterator last,
763
782
OutputIterator result,
764
783
command_queue &queue,
784
+ const wait_list &events,
765
785
typename boost::enable_if_c<
766
786
!is_device_iterator<InputIterator>::value &&
767
787
!is_device_iterator<OutputIterator>::value
768
788
>::type* = 0 )
769
789
{
770
790
(void ) queue;
791
+ (void ) events;
771
792
772
793
return std::copy (first, last, result);
773
794
}
@@ -833,9 +854,10 @@ template<class InputIterator, class OutputIterator>
833
854
inline OutputIterator copy (InputIterator first,
834
855
InputIterator last,
835
856
OutputIterator result,
836
- command_queue &queue = system::default_queue())
857
+ command_queue &queue = system::default_queue(),
858
+ const wait_list &events = wait_list())
837
859
{
838
- return detail::dispatch_copy (first, last, result, queue);
860
+ return detail::dispatch_copy (first, last, result, queue, events );
839
861
}
840
862
841
863
// / Copies the values in the range [\p first, \p last) to the range
@@ -847,9 +869,10 @@ inline future<OutputIterator>
847
869
copy_async (InputIterator first,
848
870
InputIterator last,
849
871
OutputIterator result,
850
- command_queue &queue = system::default_queue())
872
+ command_queue &queue = system::default_queue(),
873
+ const wait_list &events = wait_list())
851
874
{
852
- return detail::dispatch_copy_async (first, last, result, queue);
875
+ return detail::dispatch_copy_async (first, last, result, queue, events );
853
876
}
854
877
855
878
} // end compute namespace
0 commit comments