forked from vgvassilev/clad
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathThrustScanByKey.cu
More file actions
108 lines (95 loc) · 6.52 KB
/
ThrustScanByKey.cu
File metadata and controls
108 lines (95 loc) · 6.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// RUN: %cladclang_cuda -I%S/../../include --cuda-path=%cudapath \
// RUN: --cuda-gpu-arch=%cudaarch %cudaldflags -oThrustScanByKey.out \
// RUN: -Xclang -verify %s 2>&1 | %filecheck %s
//
// RUN: %cudarun ./ThrustScanByKey.out | %filecheck_exec %s
//
// REQUIRES: cuda-runtime
//
// expected-no-diagnostics
#include <iostream>
#include <vector>
#include "clad/Differentiator/Differentiator.h"
#include "clad/Differentiator/ThrustDerivatives.h"
#include "../TestUtils.h"
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/functional.h>
#include <thrust/scan.h>
void inclusive_scan_by_key_plus(const thrust::device_vector<int>& keys,
const thrust::device_vector<double>& values,
thrust::device_vector<double>& output) {
thrust::inclusive_scan_by_key(keys.begin(), keys.end(), values.begin(),
output.begin(), thrust::equal_to<int>(),
thrust::plus<double>());
}
// CHECK: void inclusive_scan_by_key_plus_grad(const thrust::device_vector<int> &keys, const thrust::device_vector<double> &values, thrust::device_vector<double> &output, thrust::device_vector<int> *_d_keys, thrust::device_vector<double> *_d_values, thrust::device_vector<double> *_d_output) {
// CHECK-NEXT: clad::custom_derivatives::thrust::inclusive_scan_by_key_reverse_forw(std::begin(keys), std::end(keys), std::begin(values), std::begin(output), thrust::equal_to<int>(), thrust::plus<double>(), std::begin((*_d_keys)), std::end((*_d_keys)), std::begin((*_d_values)), std::begin((*_d_output)), {}, {});
// CHECK-NEXT: {
// CHECK-NEXT: thrust::detail::vector_base<int, thrust::device_allocator<int> >::const_iterator _r0 = std::begin((*_d_keys));
// CHECK-NEXT: thrust::detail::vector_base<int, thrust::device_allocator<int> >::const_iterator _r1 = std::end((*_d_keys));
// CHECK-NEXT: thrust::detail::vector_base<double, thrust::device_allocator<double> >::const_iterator _r2 = std::begin((*_d_values));
// CHECK-NEXT: thrust::detail::vector_base<double, thrust::device_allocator<double> >::iterator _r3 = std::begin((*_d_output));
// CHECK-NEXT: thrust::equal_to<int> _r4 = {};
// CHECK-NEXT: thrust::plus<double> _r5 = {};
// CHECK-NEXT: clad::custom_derivatives::thrust::inclusive_scan_by_key_pullback(std::begin(keys), std::end(keys), std::begin(values), std::begin(output), thrust::equal_to<int>(), thrust::plus<double>(), {}, &_r0, &_r1, &_r2, &_r3, &_r4, &_r5);
// CHECK-NEXT: }
// CHECK-NEXT: }
void exclusive_scan_by_key_plus(const thrust::device_vector<int>& keys,
const thrust::device_vector<double>& values,
thrust::device_vector<double>& output,
double init) {
thrust::exclusive_scan_by_key(keys.begin(), keys.end(), values.begin(),
output.begin(), init, thrust::equal_to<int>(),
thrust::plus<double>());
}
// CHECK: void exclusive_scan_by_key_plus_grad(const thrust::device_vector<int> &keys, const thrust::device_vector<double> &values, thrust::device_vector<double> &output, double init, thrust::device_vector<int> *_d_keys, thrust::device_vector<double> *_d_values, thrust::device_vector<double> *_d_output, double *_d_init) {
// CHECK-NEXT: clad::custom_derivatives::thrust::exclusive_scan_by_key_reverse_forw(std::begin(keys), std::end(keys), std::begin(values), std::begin(output), init, thrust::equal_to<int>(), thrust::plus<double>(), std::begin((*_d_keys)), std::end((*_d_keys)), std::begin((*_d_values)), std::begin((*_d_output)), 0., {}, {});
// CHECK-NEXT: {
// CHECK-NEXT: thrust::detail::vector_base<int, thrust::device_allocator<int> >::const_iterator _r0 = std::begin((*_d_keys));
// CHECK-NEXT: thrust::detail::vector_base<int, thrust::device_allocator<int> >::const_iterator _r1 = std::end((*_d_keys));
// CHECK-NEXT: thrust::detail::vector_base<double, thrust::device_allocator<double> >::const_iterator _r2 = std::begin((*_d_values));
// CHECK-NEXT: thrust::detail::vector_base<double, thrust::device_allocator<double> >::iterator _r3 = std::begin((*_d_output));
// CHECK-NEXT: double _r4 = 0.;
// CHECK-NEXT: thrust::equal_to<int> _r5 = {};
// CHECK-NEXT: thrust::plus<double> _r6 = {};
// CHECK-NEXT: clad::custom_derivatives::thrust::exclusive_scan_by_key_pullback(std::begin(keys), std::end(keys), std::begin(values), std::begin(output), init, thrust::equal_to<int>(), thrust::plus<double>(), {}, &_r0, &_r1, &_r2, &_r3, &_r4, &_r5, &_r6);
// CHECK-NEXT: *_d_init += _r4;
// CHECK-NEXT: }
// CHECK-NEXT: }
int main() {
std::vector<int> host_keys = {0, 0, 1, 1, 2, 2, 2};
std::vector<double> host_vals = {1, 2, 3, 4, 5, 6, 7};
const size_t n = host_keys.size();
thrust::device_vector<int> d_keys = host_keys;
thrust::device_vector<double> d_vals = host_vals;
thrust::device_vector<double> d_out(n);
thrust::device_vector<double> d_output_adj(n);
thrust::fill(d_output_adj.begin(), d_output_adj.end(), 1.0);
// Inclusive scan by key
INIT_GRADIENT(inclusive_scan_by_key_plus);
thrust::device_vector<int> d_keys_grad(n);
thrust::device_vector<double> d_vals_grad(n);
inclusive_scan_by_key_plus_grad.execute(d_keys, d_vals, d_out, &d_keys_grad,
&d_vals_grad, &d_output_adj);
thrust::host_vector<double> hv_incl = d_vals_grad;
printf("Inclusive ByKey Gradients: ");
for (size_t i = 0; i < n; ++i) printf("%.3f%s", hv_incl[i], i + 1 == n ? "\n" : " ");
// CHECK-EXEC: Inclusive ByKey Gradients: 2.000 1.000 2.000 1.000 3.000 2.000 1.000
// Exclusive scan by key
thrust::fill(d_output_adj.begin(), d_output_adj.end(), 1.0);
INIT_GRADIENT(exclusive_scan_by_key_plus);
thrust::fill(d_vals_grad.begin(), d_vals_grad.end(), 0.0);
double d_init = 1.0;
exclusive_scan_by_key_plus_grad.execute(d_keys, d_vals, d_out, 0.0,
&d_keys_grad, &d_vals_grad,
&d_output_adj, &d_init);
thrust::host_vector<double> hv_excl = d_vals_grad;
printf("Exclusive ByKey Gradients: ");
for (size_t i = 0; i < n; ++i) printf("%.3f%s", hv_excl[i], i + 1 == n ? "\n" : " ");
// CHECK-EXEC: Exclusive ByKey Gradients: 1.000 0.000 1.000 0.000 2.000 1.000 0.000
printf("Exclusive ByKey Init Gradient: %.3f\n", d_init);
// Sum of adjoints (n) + initial seed (1.0) = 8.000
// CHECK-EXEC: Exclusive ByKey Init Gradient: 8.000
return 0;
}