@@ -21,10 +21,16 @@ def histogram(vector, hist):
21
21
#define VECTOR_SIZE 1000000
22
22
23
23
#pragma tuner start histogram vector(int*:VECTOR_SIZE) hist(int*:HIST_SIZE)
24
+ #if enable_reduction == 1
25
+ #pragma acc parallel num_gangs(ngangs) vector_length(nthreads) reduction(+:hist[:HIST_SIZE])
26
+ #else
24
27
#pragma acc parallel num_gangs(ngangs) vector_length(nthreads)
28
+ #endif
25
29
#pragma acc loop independent
26
30
for ( int i = 0; i < VECTOR_SIZE; i++ ) {
31
+ #if enable_atomic == 1
27
32
#pragma acc atomic update
33
+ #endif
28
34
hist[vector[i]] += 1;
29
35
}
30
36
#pragma tuner stop
@@ -37,6 +43,9 @@ def histogram(vector, hist):
37
43
tune_params = dict ()
38
44
tune_params ["ngangs" ] = [2 ** i for i in range (1 , 11 )]
39
45
tune_params ["nthreads" ] = [32 * i for i in range (1 , 33 )]
46
+ tune_params ["enable_reduction" ] = [0 , 1 ]
47
+ tune_params ["enable_atomic" ] = [0 , 1 ]
48
+ constraints = ["enable_reduction != enable_atomic" ]
40
49
metrics = dict ()
41
50
metrics ["GB/s" ] = (
42
51
lambda x : ((2 * 4 * len (kernel_args ["histogram" ][0 ])) + (4 * len (kernel_args ["histogram" ][0 ])))
@@ -56,6 +65,7 @@ def histogram(vector, hist):
56
65
0 ,
57
66
kernel_args ["histogram" ],
58
67
tune_params ,
68
+ restrictions = constraints ,
59
69
metrics = metrics ,
60
70
answer = answer ,
61
71
compiler = "nvc++" ,
0 commit comments