Skip to content

Commit 7484e75

Browse files
committed
Merge branch 'master' into directives
2 parents 7325006 + ece0719 commit 7484e75

File tree

11 files changed

+42
-30
lines changed

11 files changed

+42
-30
lines changed

INSTALL.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ Or you could install Kernel Tuner and PyOpenCL together if you haven't done so a
125125
If this fails, please see the PyOpenCL installation guide (https://wiki.tiker.net/PyOpenCL/Installation)
126126

127127
HIP and HIP Python
128-
-------------
128+
------------------
129129

130130
Before we can install HIP Python, you'll need to have the HIP runtime and compiler installed on your system.
131131
The HIP compiler is included as part of the ROCm software stack. Here is AMD's installation guide:

doc/source/architecture.png

10.6 KB
Loading

doc/source/design.rst

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,8 @@ kernel_tuner.runners.sequential.SimulationRunner
8989
:members:
9090

9191

92-
Device Interfaces
93-
-----------------
94-
95-
kernel_tuner.core.DeviceInterface
96-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
97-
.. autoclass:: kernel_tuner.core.DeviceInterface
98-
:special-members: __init__
99-
:members:
92+
Backends
93+
--------
10094

10195
kernel_tuner.backends.pycuda.PyCudaFunctions
10296
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

kernel_tuner/backends/hip.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
104104

105105
def ready_argument_list(self, arguments):
106106
"""Ready argument list to be passed to the HIP function.
107+
107108
:param arguments: List of arguments to be passed to the HIP function.
108109
The order should match the argument list on the HIP function.
109110
Allowed values are np.ndarray, and/or np.int32, np.float32, and so on.

kernel_tuner/interface.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,8 +592,8 @@ def tune_kernel(
592592
# check for forbidden names in tune parameters
593593
util.check_tune_params_list(tune_params, observers, simulation_mode=simulation_mode)
594594

595-
# check whether block_size_names are used as expected
596-
util.check_block_size_params_names_list(block_size_names, tune_params)
595+
# check whether block_size_names are used
596+
block_size_names = util.check_block_size_params_names_list(block_size_names, tune_params)
597597

598598
# ensure there is always at least three names
599599
util.append_default_block_size_names(block_size_names)

kernel_tuner/observers/powersensor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class PowerSensorObserver(BenchmarkObserver):
1313
"""Observer that an external PowerSensor2 device to accurately measure power
1414
15-
Requires PowerSensor2 hardware and powersensor Python bindings.
15+
Requires PowerSensor3 hardware and powersensor Python bindings.
1616
1717
:param observables: A list of string, containing any of "ps_energy" or "ps_power".
1818
To measure energy in Joules or power consumption in Watt.
@@ -51,9 +51,8 @@ def after_finish(self):
5151
) # Joules
5252
self.results["ps_energy"].append(ps_measured_e)
5353
if "ps_power" in self.observables:
54-
ps_measured_t = (
55-
end_state.time_at_read - self.begin_state.time_at_read
56-
) # seconds
54+
ps_measured_t = ((end_state.time_at_read - self.begin_state.time_at_read).microseconds / 1e6) # Seconds
55+
5756
self.results["ps_power"].append(ps_measured_e / ps_measured_t) # Watt
5857

5958
def get_results(self):

kernel_tuner/strategies/bayes_opt.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
235235
self.invalid_value = 1e20
236236
self.opt_direction = opt_direction
237237
if opt_direction == "min":
238-
self.worst_value = np.PINF
238+
self.worst_value = np.inf
239239
self.argopt = np.argmin
240240
elif opt_direction == "max":
241241
self.worst_value = np.NINF
@@ -262,7 +262,7 @@ def get_hyperparam(name: str, default, supported_values=list()):
262262
self.__visited_num = 0
263263
self.__visited_valid_num = 0
264264
self.__visited_searchspace_indices = [False] * self.searchspace_size
265-
self.__observations = [np.NaN] * self.searchspace_size
265+
self.__observations = [np.nan] * self.searchspace_size
266266
self.__valid_observation_indices = [False] * self.searchspace_size
267267
self.__valid_params = list()
268268
self.__valid_observations = list()
@@ -311,7 +311,7 @@ def is_not_visited(self, index: int) -> bool:
311311

312312
def is_valid(self, observation: float) -> bool:
313313
"""Returns whether an observation is valid."""
314-
return not (observation is None or observation == self.invalid_value or observation == np.NaN)
314+
return not (observation is None or observation == self.invalid_value or observation == np.nan)
315315

316316
def get_af_by_name(self, name: str):
317317
"""Get the basic acquisition functions by their name."""

kernel_tuner/util.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,11 +235,22 @@ def check_block_size_params_names_list(block_size_names, tune_params):
235235
"Block size name " + name + " is not specified in the tunable parameters list!", UserWarning
236236
)
237237
else: # if default block size names are used
238-
if not any([k in default_block_size_names for k in tune_params.keys()]):
238+
if not any([k.lower() in default_block_size_names for k in tune_params.keys()]):
239239
warnings.warn(
240240
"None of the tunable parameters specify thread block dimensions!",
241241
UserWarning,
242242
)
243+
else:
244+
# check for alternative case spelling of defaults such as BLOCK_SIZE_X or block_Size_X etc
245+
result = []
246+
for k in tune_params.keys():
247+
if k.lower() in default_block_size_names and k not in default_block_size_names:
248+
result.append(k)
249+
# ensure order of block_size_names is correct regardless of case used
250+
block_size_names = sorted(result, key=str.casefold)
251+
252+
return block_size_names
253+
243254

244255

245256
def check_restriction(restrict, params: dict) -> bool:

pyproject.toml

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api"
66
name = "kernel_tuner"
77
packages = [{ include = "kernel_tuner", from = "." }]
88
description = "An easy to use CUDA/OpenCL kernel tuner in Python"
9-
version = "1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
9+
version = "1.1.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
1010
license = "Apache-2.0"
1111
authors = [
1212
"Ben van Werkhoven <[email protected]>",
@@ -57,12 +57,17 @@ generate-setup-file = false
5757
[tool.poetry.scripts]
5858
kernel_tuner = "kernel_tuner.interface:entry_point"
5959

60+
[[tool.poetry.source]]
61+
name = "testpypi"
62+
url = "https://test.pypi.org/simple/"
63+
priority = "explicit"
64+
6065
# ATTENTION: if anything is changed here, run `poetry update`
6166
[tool.poetry.dependencies]
62-
python = ">=3.9,<3.15" # NOTE when changing the supported Python versions, also change the test versions in the noxfile
63-
numpy = "^1.26.0" # Python 3.12 requires numpy at least 1.26
64-
scipy = ">=1.11.0" # held back by Python 3.9
65-
packaging = "*" # required by file_utils
67+
python = ">=3.9,<4" # <4 is because of hip-python # NOTE when changing the supported Python versions, also change the test versions in the noxfile
68+
numpy = "^2.0.0" # Python 3.12 requires numpy at least 1.26
69+
scipy = ">=1.11.0" # held back by Python 3.9
70+
packaging = "*" # required by file_utils
6671
jsonschema = "*"
6772
python-constraint2 = "^2.1.0"
6873
xmltodict = "*"
@@ -72,6 +77,7 @@ scikit-learn = ">=1.0.2"
7277

7378
# List of optional dependencies for user installation, e.g. `pip install kernel_tuner[cuda]`, used in the below `extras`.
7479
# Please note that this is different from the dependency groups below, e.g. `docs` and `test`, those are for development.
80+
# ATTENTION: if anything is changed here, run `poetry update`
7581
# CUDA
7682
pycuda = { version = "^2024.1", optional = true } # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
7783
nvidia-ml-py = { version = "^12.535.108", optional = true }
@@ -82,7 +88,7 @@ pynvml = { version = "^11.4.1", optional = true }
8288
# OpenCL
8389
pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
8490
# HIP
85-
hip-python-fork = { version = "*", optional = true }
91+
hip-python = { version = "^6.3.3.540.31", source = "testpypi", optional = true } # Note: when released, switch this package to pypi and remove tool.poetry.source
8692
# Tutorial (for the notebooks used in the examples)
8793
jupyter = { version = "^1.0.0", optional = true }
8894
matplotlib = { version = "^3.5.0", optional = true }
@@ -91,7 +97,7 @@ matplotlib = { version = "^3.5.0", optional = true }
9197
cuda = ["pycuda", "nvidia-ml-py", "pynvml"]
9298
opencl = ["pyopencl"]
9399
cuda_opencl = ["pycuda", "pyopencl"]
94-
hip = ["hip-python-fork"]
100+
hip = ["hip-python"]
95101
tutorial = ["jupyter", "matplotlib", "nvidia-ml-py"]
96102

97103
# ATTENTION: if anything is changed here, run `poetry update` and `poetry export --with docs --without-hashes --format=requirements.txt --output doc/requirements.txt`
@@ -147,4 +153,4 @@ select = [
147153
"D", # pydocstyle,
148154
]
149155
[tool.ruff.pydocstyle]
150-
convention = "google"
156+
convention = "google"

test/context.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@
5555

5656
try:
5757
from hip import hip
58+
hip.hipDriverGetVersion()
5859
hip_present = True
59-
except ImportError:
60+
except (ImportError, RuntimeError):
6061
hip_present = False
6162

6263
skip_if_no_pycuda = pytest.mark.skipif(
@@ -78,7 +79,7 @@
7879
)
7980
skip_if_no_openmp = pytest.mark.skipif(not openmp_present, reason="No OpenMP found")
8081
skip_if_no_openacc = pytest.mark.skipif(not openacc_present, reason="No nvc++ on PATH")
81-
skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found")
82+
skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found or no HIP device detected")
8283

8384

8485
def skip_backend(backend: str):

0 commit comments

Comments
 (0)