Skip to content

Commit 760462c

Browse files
committed
OpenMP version of the histogram example.
1 parent 47898d9 commit 760462c

File tree

2 files changed

+71
-1
lines changed

2 files changed

+71
-1
lines changed

examples/directives/histogram_c_openacc.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from kernel_tuner import tune_kernel
66
from kernel_tuner.utils.directives import Code, OpenACC, Cxx, process_directives
7-
from kernel_tuner.observers.observer import BenchmarkObserver
87

98

109
# Naive Python histogram implementation
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env python
2+
"""This is a simple example for tuning C++ OpenMP code with the kernel tuner"""
3+
import numpy as np
4+
5+
from kernel_tuner import tune_kernel
6+
from kernel_tuner.utils.directives import Code, OpenMP, Cxx, process_directives
7+
8+
9+
# Naive Python histogram implementation
10+
def histogram(vector, hist):
11+
for i in range(0, len(vector)):
12+
hist[vector[i]] += 1
13+
return hist
14+
15+
16+
code = """
17+
#include <stdlib.h>
18+
19+
#define HIST_SIZE 256
20+
#define VECTOR_SIZE 1000000
21+
22+
#pragma tuner start histogram vector(int*:VECTOR_SIZE) hist(int*:HIST_SIZE)
23+
#if enable_reduction == 1
24+
#pragma omp target teams distribute parallel for num_teams(nteams) num_threads(nthreads) reduction(+:hist[:HIST_SIZE])
25+
#else
26+
#pragma omp target teams distribute parallel for num_teams(nteams) num_threads(nthreads)
27+
#endif
28+
for ( int i = 0; i < VECTOR_SIZE; i++ ) {
29+
#if enable_atomic == 1
30+
#pragma omp atomic update
31+
#endif
32+
hist[vector[i]] += 1;
33+
}
34+
#pragma tuner stop
35+
"""
36+
37+
# Extract tunable directive
38+
app = Code(OpenMP(), Cxx())
39+
kernel_string, kernel_args = process_directives(app, code)
40+
41+
tune_params = dict()
42+
tune_params["nteams"] = [2**i for i in range(1, 11)]
43+
tune_params["nthreads"] = [32 * i for i in range(1, 33)]
44+
tune_params["enable_reduction"] = [0, 1]
45+
tune_params["enable_atomic"] = [0, 1]
46+
constraints = ["enable_reduction != enable_atomic"]
47+
metrics = dict()
48+
metrics["GB/s"] = (
49+
lambda x: ((2 * 4 * len(kernel_args["histogram"][0])) + (4 * len(kernel_args["histogram"][0])))
50+
/ (x["time"] / 10**3)
51+
/ 10**9
52+
)
53+
54+
kernel_args["histogram"][0] = np.random.randint(0, 256, len(kernel_args["histogram"][0]), dtype=np.int32)
55+
kernel_args["histogram"][1] = np.zeros(len(kernel_args["histogram"][1])).astype(np.int32)
56+
reference_hist = np.zeros_like(kernel_args["histogram"][1]).astype(np.int32)
57+
reference_hist = histogram(kernel_args["histogram"][0], reference_hist)
58+
answer = [None, reference_hist]
59+
60+
tune_kernel(
61+
"histogram",
62+
kernel_string["histogram"],
63+
0,
64+
kernel_args["histogram"],
65+
tune_params,
66+
restrictions=constraints,
67+
metrics=metrics,
68+
answer=answer,
69+
compiler="nvc++",
70+
compiler_options=["-fast", "-mp=gpu"],
71+
)

0 commit comments

Comments
 (0)