Skip to content

Commit c1acc3b

Browse files
committed
Merge remote-tracking branch 'origin/master' into searchspace_experiments
2 parents bd1b4c3 + 28014d7 commit c1acc3b

File tree

7 files changed

+21
-16
lines changed

7 files changed

+21
-16
lines changed

INSTALL.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ Or you could install Kernel Tuner and PyOpenCL together if you haven't done so a
125125
If this fails, please see the PyOpenCL installation guide (https://wiki.tiker.net/PyOpenCL/Installation)
126126

127127
HIP and HIP Python
128-
-------------
128+
------------------
129129

130130
Before we can install HIP Python, you'll need to have the HIP runtime and compiler installed on your system.
131131
The HIP compiler is included as part of the ROCm software stack. Here is AMD's installation guide:

doc/source/architecture.png

10.6 KB
Loading

doc/source/design.rst

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,8 @@ kernel_tuner.runners.sequential.SimulationRunner
8989
:members:
9090

9191

92-
Device Interfaces
93-
-----------------
94-
95-
kernel_tuner.core.DeviceInterface
96-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
97-
.. autoclass:: kernel_tuner.core.DeviceInterface
98-
:special-members: __init__
99-
:members:
92+
Backends
93+
--------
10094

10195
kernel_tuner.backends.pycuda.PyCudaFunctions
10296
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

kernel_tuner/backends/hip.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
104104

105105
def ready_argument_list(self, arguments):
106106
"""Ready argument list to be passed to the HIP function.
107+
107108
:param arguments: List of arguments to be passed to the HIP function.
108109
The order should match the argument list on the HIP function.
109110
Allowed values are np.ndarray, and/or np.int32, np.float32, and so on.

kernel_tuner/interface.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,8 @@ def tune_kernel(
597597
# check for forbidden names in tune parameters
598598
util.check_tune_params_list(tune_params, observers, simulation_mode=simulation_mode)
599599

600-
# check whether block_size_names are used as expected
601-
util.check_block_size_params_names_list(block_size_names, tune_params)
600+
# check whether block_size_names are used
601+
block_size_names = util.check_block_size_params_names_list(block_size_names, tune_params)
602602

603603
# ensure there is always at least three names
604604
util.append_default_block_size_names(block_size_names)

kernel_tuner/observers/powersensor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
class PowerSensorObserver(BenchmarkObserver):
1313
"""Observer that an external PowerSensor2 device to accurately measure power
1414
15-
Requires PowerSensor2 hardware and powersensor Python bindings.
15+
Requires PowerSensor3 hardware and powersensor Python bindings.
1616
1717
:param observables: A list of string, containing any of "ps_energy" or "ps_power".
1818
To measure energy in Joules or power consumption in Watt.
@@ -51,9 +51,8 @@ def after_finish(self):
5151
) # Joules
5252
self.results["ps_energy"].append(ps_measured_e)
5353
if "ps_power" in self.observables:
54-
ps_measured_t = (
55-
end_state.time_at_read - self.begin_state.time_at_read
56-
) # seconds
54+
ps_measured_t = ((end_state.time_at_read - self.begin_state.time_at_read).microseconds / 1e6) # Seconds
55+
5756
self.results["ps_power"].append(ps_measured_e / ps_measured_t) # Watt
5857

5958
def get_results(self):

kernel_tuner/util.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,22 @@ def check_block_size_params_names_list(block_size_names, tune_params):
236236
"Block size name " + name + " is not specified in the tunable parameters list!", UserWarning
237237
)
238238
else: # if default block size names are used
239-
if not any([k in default_block_size_names for k in tune_params.keys()]):
239+
if not any([k.lower() in default_block_size_names for k in tune_params.keys()]):
240240
warnings.warn(
241241
"None of the tunable parameters specify thread block dimensions!",
242242
UserWarning,
243243
)
244+
else:
245+
# check for alternative case spelling of defaults such as BLOCK_SIZE_X or block_Size_X etc
246+
result = []
247+
for k in tune_params.keys():
248+
if k.lower() in default_block_size_names and k not in default_block_size_names:
249+
result.append(k)
250+
# ensure order of block_size_names is correct regardless of case used
251+
block_size_names = sorted(result, key=str.casefold)
252+
253+
return block_size_names
254+
244255

245256

246257
def check_restriction(restrict, params: dict) -> bool:

0 commit comments

Comments
 (0)