Skip to content

Commit 61536be

Browse files
merged
2 parents 78c8a9d + b90f5e4 commit 61536be

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2828
-1178
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
poetry.lock
33
noxenv.txt
44
noxsettings.toml
5+
hyperparamtuning/
6+
*.prof
57

68
### Python ###
79
*.pyc
@@ -16,6 +18,8 @@ push_to_pypi.sh
1618
.nfs*
1719
*.log
1820
*.json
21+
!kernel_tuner/schema/T1/1.0.0/input-schema.json
22+
!test/test_T1_input.json
1923
*.csv
2024
.cache
2125
*.ipynb_checkpoints

CHANGELOG.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@ All notable changes to this project will be documented in this file.
33
This project adheres to [Semantic Versioning](http://semver.org/).
44

55
## Unreleased
6+
<!-- ## [1.1.0] - 2025 ?? -->
7+
- Additional improvements to search space construction
68
- changed HIP python bindings from pyhip-interface to the official hip-python
9+
- Added Python 3.13 and experimental 3.14 support
10+
- Dropped Python 3.8 and 3.9 support (due to incompatibility with newer scipy versions)
711

812
## [1.0.0] - 2024-04-04
913
- HIP backend to support tuning HIP kernels on AMD GPUs
1014
- Experimental features for mixed-precision and accuracy tuning
1115
- Experimental features for OpenACC tuning
12-
- Major speedup due to new parser and using revamped python-constraint for searchspace building
16+
- Major speedup due to new parser and using revamped python-constraint for search space construction
1317
- Implemented ability to use `PySMT` and `ATF` for searchspace building
1418
- Added Poetry for dependency and build management
1519
- Switched from `setup.py` and `setup.cfg` to `pyproject.toml` for centralized metadata, added relevant tests

INSTALL.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Linux users could type the following to download and install Python 3 using Mini
2020
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
2121
bash Miniconda3-latest-Linux-x86_64.sh
2222
23-
You are of course also free to use your own Python installation, and the Kernel Tuner is developed to be fully compatible with Python 3.9 and newer.
23+
You are of course also free to use your own Python installation, and the Kernel Tuner is developed to be fully compatible with Python 3.10 and newer.
2424

2525
Installing Python Packages
2626
--------------------------

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717

1818
Create optimized GPU applications in any mainstream GPU
19-
programming language (CUDA, HIP, OpenCL, OpenACC).
19+
programming language (CUDA, HIP, OpenCL, OpenACC, OpenMP).
2020

2121
What Kernel Tuner does:
2222

doc/requirements.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15"
5757
pywin32==308 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.15"
5858
pyzmq==26.2.1 ; python_version >= "3.9" and python_version < "3.15"
5959
referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15"
60-
requests==2.32.3 ; python_version >= "3.9" and python_version < "3.15"
60+
requests==2.32.4 ; python_version >= "3.9" and python_version < "3.15"
6161
rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15"
6262
scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15"
6363
scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15"
@@ -78,11 +78,11 @@ stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.15"
7878
threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15"
7979
tinycss2==1.4.0 ; python_version >= "3.9" and python_version < "3.15"
8080
tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15"
81-
tornado==6.4.2 ; python_version >= "3.9" and python_version < "3.15"
81+
tornado==6.5.1 ; python_version >= "3.9" and python_version < "3.15"
8282
traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.15"
8383
typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.15"
8484
tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15"
85-
urllib3==2.3.0 ; python_version >= "3.9" and python_version < "3.15"
85+
urllib3==2.5.0 ; python_version >= "3.9" and python_version < "3.15"
8686
wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.15"
8787
webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.15"
8888
xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15"

doc/requirements_test.txt

Lines changed: 291 additions & 164 deletions
Large diffs are not rendered by default.

doc/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
# import data from pyproject.toml using https://github.com/sphinx-toolbox/sphinx-pyproject
3030
# additional data can be added with `[tool.sphinx-pyproject]` and retrieved with `config['']`.
3131
config = SphinxConfig(
32-
"../../pyproject.toml", style="poetry"
32+
"../../pyproject.toml",
3333
) # add `, globalns=globals()` to directly insert in namespace
3434
year = time.strftime("%Y")
3535
startyear = "2016"

doc/source/dev-environment.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ Steps with :bash:`sudo` access (e.g. on a local device):
2727
* After installation, restart your shell.
2828
#. Install the required Python versions:
2929
* On some systems, additional packages may be needed to build Python versions. For example on Ubuntu: :bash:`sudo apt install build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev liblzma-dev lzma`.
30-
* Install the Python versions with: :bash:`pyenv install 3.9 3.10 3.11 3.12`. The reason we're installing all these versions as opposed to just one, is so we can test against all supported Python versions.
31-
#. Set the Python versions so they can be found: :bash:`pyenv local 3.9 3.10 3.11 3.12` (replace :bash:`local` with :bash:`global` when not using the virtualenv).
30+
* Install the Python versions with: :bash:`pyenv install 3.9 3.10 3.11 3.12 3.13`. The reason we're installing all these versions as opposed to just one, is so we can test against all supported Python versions.
31+
#. Set the Python versions so they can be found: :bash:`pyenv local 3.9 3.10 3.11 3.12 3.13` (replace :bash:`local` with :bash:`global` when not using the virtualenv).
3232
#. Setup a local virtual environment in the folder: :bash:`pyenv virtualenv 3.11 kerneltuner` (or whatever environment name and Python version you prefer).
3333
#. `Install Poetry <https://python-poetry.org/docs/#installing-with-the-official-installer>`__.
3434
* Use :bash:`curl -sSL https://install.python-poetry.org | python3 -` to install Poetry.

examples/c/vector_add.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
}
2727
"""
2828

29-
size = 72*1024*1024
29+
size = 72 * 1024 * 1024
3030

3131
a = numpy.random.randn(size).astype(numpy.float32)
3232
b = numpy.random.randn(size).astype(numpy.float32)
@@ -39,7 +39,6 @@
3939
tune_params["nthreads"] = [1, 2, 3, 4, 8, 12, 16, 24, 32]
4040
tune_params["vecsize"] = [1, 2, 4, 8, 16]
4141

42-
answer = [a+b, None, None, None]
42+
answer = [a + b, None, None, None]
4343

44-
tune_kernel("vector_add", kernel_string, size, args, tune_params,
45-
answer=answer, compiler_options=['-O3'])
44+
tune_kernel("vector_add", kernel_string, size, args, tune_params, answer=answer, compiler_options=["-fopenmp", "-O3"])
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python
2+
"""This is a simple example for tuning C++ OpenACC code with the kernel tuner"""
3+
import numpy as np
4+
5+
from kernel_tuner import tune_kernel
6+
from kernel_tuner.utils.directives import Code, OpenACC, Cxx, process_directives
7+
8+
9+
# Naive Python histogram implementation
10+
def histogram(vector, hist):
11+
for i in range(0, len(vector)):
12+
hist[vector[i]] += 1
13+
return hist
14+
15+
16+
code = """
17+
#include <stdlib.h>
18+
19+
#define HIST_SIZE 256
20+
#define VECTOR_SIZE 1000000
21+
22+
#pragma tuner start histogram vector(int*:VECTOR_SIZE) hist(int*:HIST_SIZE)
23+
#if enable_reduction == 1
24+
#pragma acc parallel num_gangs(ngangs) vector_length(nthreads) reduction(+:hist[:HIST_SIZE])
25+
#else
26+
#pragma acc parallel num_gangs(ngangs) vector_length(nthreads)
27+
#endif
28+
#pragma acc loop independent
29+
for ( int i = 0; i < VECTOR_SIZE; i++ ) {
30+
#if enable_atomic == 1
31+
#pragma acc atomic update
32+
#endif
33+
hist[vector[i]] += 1;
34+
}
35+
#pragma tuner stop
36+
"""
37+
38+
# Extract tunable directive
39+
app = Code(OpenACC(), Cxx())
40+
kernel_string, kernel_args = process_directives(app, code)
41+
42+
tune_params = dict()
43+
tune_params["ngangs"] = [2**i for i in range(1, 11)]
44+
tune_params["nthreads"] = [32 * i for i in range(1, 33)]
45+
tune_params["enable_reduction"] = [0, 1]
46+
tune_params["enable_atomic"] = [0, 1]
47+
constraints = ["enable_reduction != enable_atomic"]
48+
metrics = dict()
49+
metrics["GB/s"] = (
50+
lambda x: ((2 * 4 * len(kernel_args["histogram"][0])) + (4 * len(kernel_args["histogram"][0])))
51+
/ (x["time"] / 10**3)
52+
/ 10**9
53+
)
54+
55+
kernel_args["histogram"][0] = np.random.randint(0, 256, len(kernel_args["histogram"][0]), dtype=np.int32)
56+
kernel_args["histogram"][1] = np.zeros(len(kernel_args["histogram"][1])).astype(np.int32)
57+
reference_hist = np.zeros_like(kernel_args["histogram"][1]).astype(np.int32)
58+
reference_hist = histogram(kernel_args["histogram"][0], reference_hist)
59+
answer = [None, reference_hist]
60+
61+
tune_kernel(
62+
"histogram",
63+
kernel_string["histogram"],
64+
0,
65+
kernel_args["histogram"],
66+
tune_params,
67+
restrictions=constraints,
68+
metrics=metrics,
69+
answer=answer,
70+
compiler="nvc++",
71+
compiler_options=["-fast", "-acc=gpu"],
72+
)

0 commit comments

Comments
 (0)