Skip to content

Commit 26914be

Browse files
committed
Merge with recent additions to searchspace_experiments
2 parents c1c3a71 + 01e1007 commit 26914be

File tree

12 files changed

+494
-363
lines changed

12 files changed

+494
-363
lines changed

INSTALL.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ Or you could install Kernel Tuner and PyOpenCL together if you haven't done so a
125125
If this fails, please see the PyOpenCL installation guide (https://wiki.tiker.net/PyOpenCL/Installation)
126126

127127
HIP and HIP Python
128-
-------------
128+
------------------
129129

130130
Before we can install HIP Python, you'll need to have the HIP runtime and compiler installed on your system.
131131
The HIP compiler is included as part of the ROCm software stack. Here is AMD's installation guide:

doc/requirements.txt

Lines changed: 92 additions & 87 deletions
Large diffs are not rendered by default.

doc/requirements_test.txt

Lines changed: 272 additions & 246 deletions
Large diffs are not rendered by default.

doc/source/architecture.png

10.6 KB
Loading

doc/source/design.rst

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,8 @@ kernel_tuner.runners.sequential.SimulationRunner
8989
:members:
9090

9191

92-
Device Interfaces
93-
-----------------
94-
95-
kernel_tuner.core.DeviceInterface
96-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
97-
.. autoclass:: kernel_tuner.core.DeviceInterface
98-
:special-members: __init__
99-
:members:
92+
Backends
93+
--------
10094

10195
kernel_tuner.backends.pycuda.PyCudaFunctions
10296
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

kernel_tuner/backends/hip.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
104104

105105
def ready_argument_list(self, arguments):
106106
"""Ready argument list to be passed to the HIP function.
107+
107108
:param arguments: List of arguments to be passed to the HIP function.
108109
The order should match the argument list on the HIP function.
109110
Allowed values are np.ndarray, and/or np.int32, np.float32, and so on.

kernel_tuner/interface.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,8 @@ def tune_kernel(
598598
# check for forbidden names in tune parameters
599599
util.check_tune_params_list(tune_params, observers, simulation_mode=simulation_mode)
600600

601-
# check whether block_size_names are used as expected
602-
util.check_block_size_params_names_list(block_size_names, tune_params)
601+
# check whether block_size_names are used
602+
block_size_names = util.check_block_size_params_names_list(block_size_names, tune_params)
603603

604604
# ensure there is always at least three names
605605
util.append_default_block_size_names(block_size_names)
@@ -691,8 +691,17 @@ def preprocess_cache(filepath):
691691
if verbose:
692692
print(f"Searchspace has {searchspace.size} configurations after restrictions.")
693693

694-
# call the strategy to execute the tuning process
694+
# register the times and raise an exception if the budget is exceeded
695+
if "time_limit" in tuning_options:
696+
tuning_options["startup_time"] = perf_counter() - start_overhead_time
697+
if tuning_options["startup_time"] > tuning_options["time_limit"]:
698+
raise RuntimeError(
699+
f"The startup time of the tuning process ({tuning_options['startup_time']} seconds) has exceeded the time limit ({tuning_options['time_limit']} seconds). "
700+
"Please increase the time limit or decrease the size of the search space."
701+
)
695702
tuning_options["start_time"] = perf_counter()
703+
704+
# call the strategy to execute the tuning process
696705
results = strategy.tune(searchspace, runner, tuning_options)
697706
env = runner.get_environment(tuning_options)
698707

kernel_tuner/observers/powersensor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class PowerSensorObserver(BenchmarkObserver):
1313
"""Observer that an external PowerSensor2 device to accurately measure power
1414
15-
Requires PowerSensor2 hardware and powersensor Python bindings.
15+
Requires PowerSensor3 hardware and powersensor Python bindings.
1616
1717
:param observables: A list of string, containing any of "ps_energy" or "ps_power".
1818
To measure energy in Joules or power consumption in Watt.
@@ -51,9 +51,8 @@ def after_finish(self):
5151
) # Joules
5252
self.results["ps_energy"].append(ps_measured_e)
5353
if "ps_power" in self.observables:
54-
ps_measured_t = (
55-
end_state.time_at_read - self.begin_state.time_at_read
56-
) # seconds
54+
ps_measured_t = ((end_state.time_at_read - self.begin_state.time_at_read).microseconds / 1e6) # Seconds
55+
5756
self.results["ps_power"].append(ps_measured_e / ps_measured_t) # Watt
5857

5958
def get_results(self):

kernel_tuner/util.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,8 @@ def check_stop_criterion(to):
194194
"""Checks if max_fevals is reached or time limit is exceeded."""
195195
if "max_fevals" in to and len(to.unique_results) >= to.max_fevals:
196196
raise StopCriterionReached(f"max_fevals reached ({len(to.unique_results)} >= {to.max_fevals})")
197-
if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit):
198-
raise StopCriterionReached(f"time limit ({to.time_limit}) exceeded")
197+
if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time) > to.time_limit):
198+
raise StopCriterionReached("time limit exceeded")
199199

200200

201201
def check_tune_params_list(tune_params, observers, simulation_mode=False):
@@ -236,11 +236,22 @@ def check_block_size_params_names_list(block_size_names, tune_params):
236236
"Block size name " + name + " is not specified in the tunable parameters list!", UserWarning
237237
)
238238
else: # if default block size names are used
239-
if not any([k in default_block_size_names for k in tune_params.keys()]):
239+
if not any([k.lower() in default_block_size_names for k in tune_params.keys()]):
240240
warnings.warn(
241241
"None of the tunable parameters specify thread block dimensions!",
242242
UserWarning,
243243
)
244+
else:
245+
# check for alternative case spelling of defaults such as BLOCK_SIZE_X or block_Size_X etc
246+
result = []
247+
for k in tune_params.keys():
248+
if k.lower() in default_block_size_names and k not in default_block_size_names:
249+
result.append(k)
250+
# ensure order of block_size_names is correct regardless of case used
251+
block_size_names = sorted(result, key=str.casefold)
252+
253+
return block_size_names
254+
244255

245256

246257
def check_restriction(restrict, params: dict) -> bool:

pyproject.toml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
55
[project]
66
name = "kernel_tuner"
77
description = "An easy to use CUDA/OpenCL kernel tuner in Python"
8-
version = "1.2.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
8+
version = "1.3.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55
99
readme = "README.md"
1010
license = "Apache-2.0"
1111
authors = [
@@ -48,13 +48,13 @@ classifiers = [
4848
]
4949

5050
# ATTENTION: if anything is changed here, run `poetry update`
51-
requires-python = ">=3.10,<4" # <4 is because of hip-python-fork # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
51+
requires-python = ">=3.10,<4" # <4 is because of hip-python # NOTE when changing the Python versions, also change the test versions in the Noxfile and GitHub Actions
5252
dependencies = [
5353
"numpy (>=1.26.0,<2.0.0)", # Python 3.12 requires numpy at least 1.26, CuPy does not support 2.0
5454
"scipy>=1.14.1",
5555
"packaging", # required by file_utils
5656
"jsonschema",
57-
"python-constraint2>=2.2.0",
57+
"python-constraint2>=2.2.2",
5858
"xmltodict",
5959
"pandas>=2.0.0",
6060
"scikit-learn>=1.0.2",
@@ -83,11 +83,20 @@ include = [
8383
# cupy-cuda11x = { version = "*", optional = true } # NOTE: these are completely optional dependencies as described in CONTRIBUTING.rst
8484
# cupy-cuda12x = { version = "*", optional = true }
8585
# cuda-python = { version = "*", optional = true }
86+
87+
[[tool.poetry.source]]
88+
name = "testpypi"
89+
url = "https://test.pypi.org/simple/"
90+
priority = "explicit"
91+
92+
[tool.poetry.dependencies]
93+
hip-python = { version = "^6.3.3.540.31", source = "testpypi", optional = true } # Note: when released, switch this package to pypi and remove tool.poetry.source and move this to [project.optional-dependencies]
94+
8695
[project.optional-dependencies]
8796
cuda = ["pycuda>=2025.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
8897
opencl = ["pyopencl"] # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile
8998
cuda_opencl = ["pycuda>=2024.1", "pyopencl"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile
90-
hip = ["hip-python-fork"]
99+
hip = ["hip-python"]
91100
tutorial = ["jupyter>=1.0.0", "matplotlib>=3.5.0", "nvidia-ml-py>=12.535.108"]
92101

93102
# ATTENTION: if anything is changed here, run `poetry update` and `poetry export --with docs --without-hashes --format=requirements.txt --output doc/requirements.txt`

0 commit comments

Comments
 (0)