diff --git a/.gitignore b/.gitignore index 43bd95c2b..1f576769a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ poetry.lock noxenv.txt noxsettings.toml +hyperparamtuning*/* +*.prof ### Python ### *.pyc @@ -16,6 +18,9 @@ push_to_pypi.sh .nfs* *.log *.json +!kernel_tuner/schema/T1/1.0.0/input-schema.json +!test/test_T1_input.json +!test_cache_file.json *.csv .cache *.ipynb_checkpoints @@ -23,6 +28,7 @@ examples/cuda/output deploy_key *.mod temp_*.* +.DS_Store .python-version .nox @@ -37,4 +43,4 @@ temp_*.* .LSOverride .vscode -.idea \ No newline at end of file +.idea diff --git a/CHANGELOG.md b/CHANGELOG.md index 57aaf27b3..c57986b50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,13 +3,17 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). ## Unreleased + +- Additional improvements to search space construction - changed HIP python bindings from pyhip-interface to the official hip-python +- Added Python 3.13 and experimental 3.14 support +- Dropped Python 3.8 and 3.9 support (due to incompatibility with newer scipy versions) ## [1.0.0] - 2024-04-04 - HIP backend to support tuning HIP kernels on AMD GPUs - Experimental features for mixed-precision and accuracy tuning - Experimental features for OpenACC tuning -- Major speedup due to new parser and using revamped python-constraint for searchspace building +- Major speedup due to new parser and using revamped python-constraint for search space construction - Implemented ability to use `PySMT` and `ATF` for searchspace building - Added Poetry for dependency and build management - Switched from `setup.py` and `setup.cfg` to `pyproject.toml` for centralized metadata, added relevant tests diff --git a/INSTALL.rst b/INSTALL.rst index 13df5f95c..8e938676f 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -20,7 +20,7 @@ Linux users could type the following to download and install Python 3 using Mini wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh bash Miniconda3-latest-Linux-x86_64.sh -You are of course also free to use your own Python installation, and the Kernel Tuner is developed to be fully compatible with Python 3.9 and newer. +You are of course also free to use your own Python installation, and the Kernel Tuner is developed to be fully compatible with Python 3.10 and newer. Installing Python Packages -------------------------- diff --git a/doc/requirements.txt b/doc/requirements.txt index 355caa7a6..e99971ffb 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,89 +1,92 @@ -alabaster==0.7.16 ; python_version >= "3.9" and python_version < "3.15" -asttokens==3.0.0 ; python_version >= "3.9" and python_version < "3.15" -attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15" -babel==2.17.0 ; python_version >= "3.9" and python_version < "3.15" -beautifulsoup4==4.13.3 ; python_version >= "3.9" and python_version < "3.15" -bleach[css]==6.2.0 ; python_version >= "3.9" and python_version < "3.15" -certifi==2025.1.31 ; python_version >= "3.9" and python_version < "3.15" -cffi==1.17.1 ; python_version >= "3.9" and python_version < "3.15" and implementation_name == "pypy" -charset-normalizer==3.4.1 ; python_version >= "3.9" and python_version < "3.15" -colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and sys_platform == "win32" -decorator==5.2.1 ; python_version >= "3.9" and python_version < "3.15" -defusedxml==0.7.1 ; python_version >= "3.9" and python_version < "3.15" -docutils==0.20.1 ; python_version >= "3.9" and python_version < "3.15" -dom-toml==2.0.1 ; python_version >= "3.9" and python_version < "3.15" -domdf-python-tools==3.10.0 ; python_version >= "3.9" and python_version < "3.15" -exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" -executing==2.2.0 ; python_version >= "3.9" and python_version < "3.15" -fastjsonschema==2.21.1 ; python_version >= "3.9" and python_version < "3.15" -idna==3.10 ; python_version >= "3.9" and python_version < "3.15" -imagesize==1.4.1 ; python_version >= "3.9" and python_version < "3.15" -importlib-metadata==8.6.1 ; python_version >= "3.9" and python_version < "3.10" -iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15" -ipython==8.18.1 ; python_version >= "3.9" and python_version < "3.15" -jedi==0.19.2 ; python_version >= "3.9" and python_version < "3.15" -jinja2==3.1.6 ; python_version >= "3.9" and python_version < "3.15" -joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15" -jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15" -jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15" -jupyter-client==8.6.3 ; python_version >= "3.9" and python_version < "3.15" -jupyter-core==5.7.2 ; python_version >= "3.9" and python_version < "3.15" -jupyterlab-pygments==0.3.0 ; python_version >= "3.9" and python_version < "3.15" -markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.15" -matplotlib-inline==0.1.7 ; python_version >= "3.9" and python_version < "3.15" -mistune==3.1.2 ; python_version >= "3.9" and python_version < "3.15" -natsort==8.4.0 ; python_version >= "3.9" and python_version < "3.15" -nbclient==0.10.2 ; python_version >= "3.9" and python_version < "3.15" -nbconvert==7.16.6 ; python_version >= "3.9" and python_version < "3.15" -nbformat==5.10.4 ; python_version >= "3.9" and python_version < "3.15" -nbsphinx==0.9.7 ; python_version >= "3.9" and python_version < "3.15" -numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" -packaging==24.2 ; python_version >= "3.9" and python_version < "3.15" -pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" -pandocfilters==1.5.1 ; python_version >= "3.9" and python_version < "3.15" -parso==0.8.4 ; python_version >= "3.9" and python_version < "3.15" -pexpect==4.9.0 ; python_version >= "3.9" and python_version < "3.15" and sys_platform != "win32" -platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15" -pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15" -prompt-toolkit==3.0.50 ; python_version >= "3.9" and python_version < "3.15" -ptyprocess==0.7.0 ; python_version >= "3.9" and python_version < "3.15" and sys_platform != "win32" -pure-eval==0.2.3 ; python_version >= "3.9" and python_version < "3.15" -pycparser==2.22 ; python_version >= "3.9" and python_version < "3.15" and implementation_name == "pypy" -pygments==2.19.1 ; python_version >= "3.9" and python_version < "3.15" -pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15" -python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15" -python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15" -pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15" -pywin32==308 ; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.9" and python_version < "3.15" -pyzmq==26.2.1 ; python_version >= "3.9" and python_version < "3.15" -referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15" -requests==2.32.3 ; python_version >= "3.9" and python_version < "3.15" -rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" -scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" -scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15" -six==1.17.0 ; python_version >= "3.9" and python_version < "3.15" -snowballstemmer==2.2.0 ; python_version >= "3.9" and python_version < "3.15" -soupsieve==2.6 ; python_version >= "3.9" and python_version < "3.15" -sphinx-pyproject==0.3.0 ; python_version >= "3.9" and python_version < "3.15" -sphinx-rtd-theme==2.0.0 ; python_version >= "3.9" and python_version < "3.15" -sphinx==7.4.7 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-jquery==4.1 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.9" and python_version < "3.15" -sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.9" and python_version < "3.15" -stack-data==0.6.3 ; python_version >= "3.9" and python_version < "3.15" -threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15" -tinycss2==1.4.0 ; python_version >= "3.9" and python_version < "3.15" -tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" -tornado==6.4.2 ; python_version >= "3.9" and python_version < "3.15" -traitlets==5.14.3 ; python_version >= "3.9" and python_version < "3.15" -typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.15" -tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15" -urllib3==2.3.0 ; python_version >= "3.9" and python_version < "3.15" -wcwidth==0.2.13 ; python_version >= "3.9" and python_version < "3.15" -webencodings==0.5.1 ; python_version >= "3.9" and python_version < "3.15" -xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15" -zipp==3.21.0 ; python_version >= "3.9" and python_version < "3.10" +--extra-index-url https://test.pypi.org/simple + +alabaster==0.7.16 ; python_version >= "3.10" and python_version < "4" +asttokens==3.0.0 ; python_version >= "3.10" and python_version < "4" +attrs==25.3.0 ; python_version >= "3.10" and python_version < "4" +babel==2.17.0 ; python_version >= "3.10" and python_version < "4" +beautifulsoup4==4.13.3 ; python_version >= "3.10" and python_version < "4" +bleach==6.2.0 ; python_version >= "3.10" and python_version < "4" +certifi==2025.1.31 ; python_version >= "3.10" and python_version < "4" +cffi==1.17.1 ; python_version >= "3.10" and python_version < "4" and implementation_name == "pypy" +charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version < "4" +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4" and sys_platform == "win32" +decorator==5.2.1 ; python_version >= "3.10" and python_version < "4" +defusedxml==0.7.1 ; python_version >= "3.10" and python_version < "4" +docutils==0.20.1 ; python_version >= "3.10" and python_version < "4" +dom-toml==2.0.1 ; python_version >= "3.10" and python_version < "4" +domdf-python-tools==3.10.0 ; python_version >= "3.10" and python_version < "4" +exceptiongroup==1.2.2 ; python_version == "3.10" +executing==2.2.0 ; python_version >= "3.10" and python_version < "4" +fastjsonschema==2.21.1 ; python_version >= "3.10" and python_version < "4" +hip-python==6.3.3.540.31 ; python_version >= "3.10" and python_version < "4" +idna==3.10 ; python_version >= "3.10" and python_version < "4" +imagesize==1.4.1 ; python_version >= "3.10" and python_version < "4" +iniconfig==2.1.0 ; python_version >= "3.10" and python_version < "4" +ipython-pygments-lexers==1.1.1 ; python_version >= "3.11" and python_version < "4" +ipython==8.34.0 ; python_version == "3.10" +ipython==9.0.2 ; python_version >= "3.11" and python_version < "4" +jedi==0.19.2 ; python_version >= "3.10" and python_version < "4" +jinja2==3.1.6 ; python_version >= "3.10" and python_version < "4" +joblib==1.4.2 ; python_version >= "3.10" and python_version < "4" +jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version < "4" +jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "4" +jupyter-client==8.6.3 ; python_version >= "3.10" and python_version < "4" +jupyter-core==5.7.2 ; python_version >= "3.10" and python_version < "4" +jupyterlab-pygments==0.3.0 ; python_version >= "3.10" and python_version < "4" +markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "4" +matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version < "4" +mistune==3.1.3 ; python_version >= "3.10" and python_version < "4" +natsort==8.4.0 ; python_version >= "3.10" and python_version < "4" +nbclient==0.10.2 ; python_version >= "3.10" and python_version < "4" +nbconvert==7.16.6 ; python_version >= "3.10" and python_version < "4" +nbformat==5.10.4 ; python_version >= "3.10" and python_version < "4" +nbsphinx==0.9.7 ; python_version >= "3.10" and python_version < "4" +numpy==1.26.4 ; python_version >= "3.10" and python_version < "4" +packaging==24.2 ; python_version >= "3.10" and python_version < "4" +pandas==2.2.3 ; python_version >= "3.10" and python_version < "4" +pandocfilters==1.5.1 ; python_version >= "3.10" and python_version < "4" +parso==0.8.4 ; python_version >= "3.10" and python_version < "4" +pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4" and sys_platform != "win32" and sys_platform != "emscripten" +platformdirs==4.3.7 ; python_version >= "3.10" and python_version < "4" +pluggy==1.5.0 ; python_version >= "3.10" and python_version < "4" +prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version < "4" +ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4" and (os_name != "nt" or sys_platform != "win32" and sys_platform != "emscripten") +pure-eval==0.2.3 ; python_version >= "3.10" and python_version < "4" +pycparser==2.22 ; python_version >= "3.10" and python_version < "4" and implementation_name == "pypy" +pygments==2.19.1 ; python_version >= "3.10" and python_version < "4" +pytest==8.3.5 ; python_version >= "3.10" and python_version < "4" +python-constraint2==2.2.2 ; python_version >= "3.10" and python_version < "4" +python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4" +pytz==2025.2 ; python_version >= "3.10" and python_version < "4" +pywin32==310 ; python_version >= "3.10" and python_version < "4" and sys_platform == "win32" and platform_python_implementation != "PyPy" +pyzmq==26.3.0 ; python_version >= "3.10" and python_version < "4" +referencing==0.36.2 ; python_version >= "3.10" and python_version < "4" +requests==2.32.3 ; python_version >= "3.10" and python_version < "4" +rpds-py==0.24.0 ; python_version >= "3.10" and python_version < "4" +scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "4" +scipy==1.15.2 ; python_version >= "3.10" and python_version < "4" +six==1.17.0 ; python_version >= "3.10" and python_version < "4" +snowballstemmer==2.2.0 ; python_version >= "3.10" and python_version < "4" +soupsieve==2.6 ; python_version >= "3.10" and python_version < "4" +sphinx-pyproject==0.3.0 ; python_version >= "3.10" and python_version < "4" +sphinx-rtd-theme==2.0.0 ; python_version >= "3.10" and python_version < "4" +sphinx==7.4.7 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-applehelp==2.0.0 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-devhelp==2.0.0 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-htmlhelp==2.1.0 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-jquery==4.1 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-jsmath==1.0.1 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-qthelp==2.0.0 ; python_version >= "3.10" and python_version < "4" +sphinxcontrib-serializinghtml==2.0.0 ; python_version >= "3.10" and python_version < "4" +stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4" +threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version < "4" +tinycss2==1.4.0 ; python_version >= "3.10" and python_version < "4" +tomli==2.2.1 ; python_version >= "3.10" and python_version < "4" +tornado==6.4.2 ; python_version >= "3.10" and python_version < "4" +traitlets==5.14.3 ; python_version >= "3.10" and python_version < "4" +typing-extensions==4.13.1 ; python_version >= "3.10" and python_version < "4" +tzdata==2025.2 ; python_version >= "3.10" and python_version < "4" +urllib3==2.3.0 ; python_version >= "3.10" and python_version < "4" +wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4" +webencodings==0.5.1 ; python_version >= "3.10" and python_version < "4" +xmltodict==0.14.2 ; python_version >= "3.10" and python_version < "4" diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt index f4f62912c..5a6db0db7 100644 --- a/doc/requirements_test.txt +++ b/doc/requirements_test.txt @@ -1,116 +1,210 @@ -argcomplete==3.6.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:2e4e42ec0ba2fff54b0d244d0b1623e86057673e57bafe72dda59c64bd5dee8b \ - --hash=sha256:4e3e4e10beb20e06444dbac0ac8dda650cb6349caeefe980208d3c548708bedd -attrs==25.1.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e \ - --hash=sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a -build==1.2.2.post1 ; python_version >= "3.9" and python_version < "3.15" \ +--extra-index-url https://test.pypi.org/simple + +argcomplete==3.6.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:65b3133a29ad53fb42c48cf5114752c7ab66c1c38544fdf6460f450c09b42591 \ + --hash=sha256:d0519b1bc867f5f4f4713c41ad0aba73a4a5f007449716b16f385f2166dc6adf +asttokens==3.0.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \ + --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2 +attrs==25.3.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \ + --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b +build==1.2.2.post1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \ --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7 -colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.15" and (sys_platform == "win32" or os_name == "nt") \ +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4" and (sys_platform == "win32" or os_name == "nt") \ --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 -colorlog==6.9.0 ; python_version >= "3.9" and python_version < "3.15" \ +colorlog==6.9.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 -coverage[toml]==7.6.12 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:00b2086892cf06c7c2d74983c9595dc511acca00665480b3ddff749ec4fb2a95 \ - --hash=sha256:0533adc29adf6a69c1baa88c3d7dbcaadcffa21afbed3ca7a225a440e4744bf9 \ - --hash=sha256:06097c7abfa611c91edb9e6920264e5be1d6ceb374efb4986f38b09eed4cb2fe \ - --hash=sha256:07e92ae5a289a4bc4c0aae710c0948d3c7892e20fd3588224ebe242039573bf0 \ - --hash=sha256:0a9d8be07fb0832636a0f72b80d2a652fe665e80e720301fb22b191c3434d924 \ - --hash=sha256:0e549f54ac5f301e8e04c569dfdb907f7be71b06b88b5063ce9d6953d2d58574 \ - --hash=sha256:0ef01d70198431719af0b1f5dcbefc557d44a190e749004042927b2a3fed0702 \ - --hash=sha256:0f16f44025c06792e0fb09571ae454bcc7a3ec75eeb3c36b025eccf501b1a4c3 \ - --hash=sha256:14d47376a4f445e9743f6c83291e60adb1b127607a3618e3185bbc8091f0467b \ - --hash=sha256:1a936309a65cc5ca80fa9f20a442ff9e2d06927ec9a4f54bcba9c14c066323f2 \ - --hash=sha256:1ceeb90c3eda1f2d8c4c578c14167dbd8c674ecd7d38e45647543f19839dd6ea \ - --hash=sha256:1f7ffa05da41754e20512202c866d0ebfc440bba3b0ed15133070e20bf5aeb5f \ - --hash=sha256:200e10beb6ddd7c3ded322a4186313d5ca9e63e33d8fab4faa67ef46d3460af3 \ - --hash=sha256:220fa6c0ad7d9caef57f2c8771918324563ef0d8272c94974717c3909664e674 \ - --hash=sha256:2251fabcfee0a55a8578a9d29cecfee5f2de02f11530e7d5c5a05859aa85aee9 \ - --hash=sha256:2458f275944db8129f95d91aee32c828a408481ecde3b30af31d552c2ce284a0 \ - --hash=sha256:299cf973a7abff87a30609879c10df0b3bfc33d021e1adabc29138a48888841e \ - --hash=sha256:2b996819ced9f7dbb812c701485d58f261bef08f9b85304d41219b1496b591ef \ - --hash=sha256:3688b99604a24492bcfe1c106278c45586eb819bf66a654d8a9a1433022fb2eb \ - --hash=sha256:3a1e465f398c713f1b212400b4e79a09829cd42aebd360362cd89c5bdc44eb87 \ - --hash=sha256:488c27b3db0ebee97a830e6b5a3ea930c4a6e2c07f27a5e67e1b3532e76b9ef1 \ - --hash=sha256:48cfc4641d95d34766ad41d9573cc0f22a48aa88d22657a1fe01dca0dbae4de2 \ - --hash=sha256:4b467a8c56974bf06e543e69ad803c6865249d7a5ccf6980457ed2bc50312703 \ - --hash=sha256:53c56358d470fa507a2b6e67a68fd002364d23c83741dbc4c2e0680d80ca227e \ - --hash=sha256:5d1095bbee1851269f79fd8e0c9b5544e4c00c0c24965e66d8cba2eb5bb535fd \ - --hash=sha256:641dfe0ab73deb7069fb972d4d9725bf11c239c309ce694dd50b1473c0f641c3 \ - --hash=sha256:64cbb1a3027c79ca6310bf101014614f6e6e18c226474606cf725238cf5bc2d4 \ - --hash=sha256:66fe626fd7aa5982cdebad23e49e78ef7dbb3e3c2a5960a2b53632f1f703ea45 \ - --hash=sha256:676f92141e3c5492d2a1596d52287d0d963df21bf5e55c8b03075a60e1ddf8aa \ - --hash=sha256:69e62c5034291c845fc4df7f8155e8544178b6c774f97a99e2734b05eb5bed31 \ - --hash=sha256:704c8c8c6ce6569286ae9622e534b4f5b9759b6f2cd643f1c1a61f666d534fe8 \ - --hash=sha256:78f5243bb6b1060aed6213d5107744c19f9571ec76d54c99cc15938eb69e0e86 \ - --hash=sha256:79cac3390bfa9836bb795be377395f28410811c9066bc4eefd8015258a7578c6 \ - --hash=sha256:7ae6eabf519bc7871ce117fb18bf14e0e343eeb96c377667e3e5dd12095e0288 \ - --hash=sha256:7e39e845c4d764208e7b8f6a21c541ade741e2c41afabdfa1caa28687a3c98cf \ - --hash=sha256:8161d9fbc7e9fe2326de89cd0abb9f3599bccc1287db0aba285cb68d204ce929 \ - --hash=sha256:8bec2ac5da793c2685ce5319ca9bcf4eee683b8a1679051f8e6ec04c4f2fd7dc \ - --hash=sha256:959244a17184515f8c52dcb65fb662808767c0bd233c1d8a166e7cf74c9ea985 \ - --hash=sha256:9b148068e881faa26d878ff63e79650e208e95cf1c22bd3f77c3ca7b1d9821a3 \ - --hash=sha256:aa6f302a3a0b5f240ee201297fff0bbfe2fa0d415a94aeb257d8b461032389bd \ - --hash=sha256:ace9048de91293e467b44bce0f0381345078389814ff6e18dbac8fdbf896360e \ - --hash=sha256:ad7525bf0241e5502168ae9c643a2f6c219fa0a283001cee4cf23a9b7da75879 \ - --hash=sha256:b01a840ecc25dce235ae4c1b6a0daefb2a203dba0e6e980637ee9c2f6ee0df57 \ - --hash=sha256:b076e625396e787448d27a411aefff867db2bffac8ed04e8f7056b07024eed5a \ - --hash=sha256:b172f8e030e8ef247b3104902cc671e20df80163b60a203653150d2fc204d1ad \ - --hash=sha256:b1f097878d74fe51e1ddd1be62d8e3682748875b461232cf4b52ddc6e6db0bba \ - --hash=sha256:b95574d06aa9d2bd6e5cc35a5bbe35696342c96760b69dc4287dbd5abd4ad51d \ - --hash=sha256:bda1c5f347550c359f841d6614fb8ca42ae5cb0b74d39f8a1e204815ebe25750 \ - --hash=sha256:cec6b9ce3bd2b7853d4a4563801292bfee40b030c05a3d29555fd2a8ee9bd68c \ - --hash=sha256:d1a987778b9c71da2fc8948e6f2656da6ef68f59298b7e9786849634c35d2c3c \ - --hash=sha256:d74c08e9aaef995f8c4ef6d202dbd219c318450fe2a76da624f2ebb9c8ec5d9f \ - --hash=sha256:e18aafdfb3e9ec0d261c942d35bd7c28d031c5855dadb491d2723ba54f4c3015 \ - --hash=sha256:e216c5c45f89ef8971373fd1c5d8d1164b81f7f5f06bbf23c37e7908d19e8558 \ - --hash=sha256:e695df2c58ce526eeab11a2e915448d3eb76f75dffe338ea613c1201b33bab2f \ - --hash=sha256:e7575ab65ca8399c8c4f9a7d61bbd2d204c8b8e447aab9d355682205c9dd948d \ - --hash=sha256:e995b3b76ccedc27fe4f477b349b7d64597e53a43fc2961db9d3fbace085d69d \ - --hash=sha256:ea31689f05043d520113e0552f039603c4dd71fa4c287b64cb3606140c66f425 \ - --hash=sha256:eb5507795caabd9b2ae3f1adc95f67b1104971c22c624bb354232d65c4fc90b3 \ - --hash=sha256:eb8668cfbc279a536c633137deeb9435d2962caec279c3f8cf8b91fff6ff8953 \ - --hash=sha256:ecea0c38c9079570163d663c0433a9af4094a60aafdca491c6a3d248c7432827 \ - --hash=sha256:f25d8b92a4e31ff1bd873654ec367ae811b3a943583e05432ea29264782dc32c \ - --hash=sha256:f60a297c3987c6c02ffb29effc70eadcbb412fe76947d394a1091a3615948e2f \ - --hash=sha256:f973643ef532d4f9be71dd88cf7588936685fdb576d93a79fe9f65bc337d9d73 -distlib==0.3.9 ; python_version >= "3.9" and python_version < "3.15" \ +coverage==7.8.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:042e7841a26498fff7a37d6fda770d17519982f5b7d8bf5278d140b67b61095f \ + --hash=sha256:04bfec25a8ef1c5f41f5e7e5c842f6b615599ca8ba8391ec33a9290d9d2db3a3 \ + --hash=sha256:0915742f4c82208ebf47a2b154a5334155ed9ef9fe6190674b8a46c2fb89cb05 \ + --hash=sha256:18c5ae6d061ad5b3e7eef4363fb27a0576012a7447af48be6c75b88494c6cf25 \ + --hash=sha256:2931f66991175369859b5fd58529cd4b73582461877ecfd859b6549869287ffe \ + --hash=sha256:2e4b6b87bb0c846a9315e3ab4be2d52fac905100565f4b92f02c445c8799e257 \ + --hash=sha256:3043ba1c88b2139126fc72cb48574b90e2e0546d4c78b5299317f61b7f718b78 \ + --hash=sha256:379fe315e206b14e21db5240f89dc0774bdd3e25c3c58c2c733c99eca96f1ada \ + --hash=sha256:42421e04069fb2cbcbca5a696c4050b84a43b05392679d4068acbe65449b5c64 \ + --hash=sha256:4dfd9a93db9e78666d178d4f08a5408aa3f2474ad4d0e0378ed5f2ef71640cb6 \ + --hash=sha256:52a523153c568d2c0ef8826f6cc23031dc86cffb8c6aeab92c4ff776e7951b28 \ + --hash=sha256:554fec1199d93ab30adaa751db68acec2b41c5602ac944bb19187cb9a41a8067 \ + --hash=sha256:581a40c7b94921fffd6457ffe532259813fc68eb2bdda60fa8cc343414ce3733 \ + --hash=sha256:5a26c0c795c3e0b63ec7da6efded5f0bc856d7c0b24b2ac84b4d1d7bc578d676 \ + --hash=sha256:5a570cd9bd20b85d1a0d7b009aaf6c110b52b5755c17be6962f8ccd65d1dbd23 \ + --hash=sha256:5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008 \ + --hash=sha256:5ac46d0c2dd5820ce93943a501ac5f6548ea81594777ca585bf002aa8854cacd \ + --hash=sha256:5c8a5c139aae4c35cbd7cadca1df02ea8cf28a911534fc1b0456acb0b14234f3 \ + --hash=sha256:6b8af63b9afa1031c0ef05b217faa598f3069148eeee6bb24b79da9012423b82 \ + --hash=sha256:769773614e676f9d8e8a0980dd7740f09a6ea386d0f383db6821df07d0f08545 \ + --hash=sha256:771eb7587a0563ca5bb6f622b9ed7f9d07bd08900f7589b4febff05f469bea00 \ + --hash=sha256:77af0f6447a582fdc7de5e06fa3757a3ef87769fbb0fdbdeba78c23049140a47 \ + --hash=sha256:7a3d62b3b03b4b6fd41a085f3574874cf946cb4604d2b4d3e8dca8cd570ca501 \ + --hash=sha256:821f7bcbaa84318287115d54becb1915eece6918136c6f91045bb84e2f88739d \ + --hash=sha256:89b1f4af0d4afe495cd4787a68e00f30f1d15939f550e869de90a86efa7e0814 \ + --hash=sha256:8a1d96e780bdb2d0cbb297325711701f7c0b6f89199a57f2049e90064c29f6bd \ + --hash=sha256:8a40fcf208e021eb14b0fac6bdb045c0e0cab53105f93ba0d03fd934c956143a \ + --hash=sha256:8f99eb72bf27cbb167b636eb1726f590c00e1ad375002230607a844d9e9a2318 \ + --hash=sha256:90e7fbc6216ecaffa5a880cdc9c77b7418c1dcb166166b78dbc630d07f278cc3 \ + --hash=sha256:94ec0be97723ae72d63d3aa41961a0b9a6f5a53ff599813c324548d18e3b9e8c \ + --hash=sha256:95aa6ae391a22bbbce1b77ddac846c98c5473de0372ba5c463480043a07bff42 \ + --hash=sha256:96121edfa4c2dfdda409877ea8608dd01de816a4dc4a0523356067b305e4e17a \ + --hash=sha256:a1f406a8e0995d654b2ad87c62caf6befa767885301f3b8f6f73e6f3c31ec3a6 \ + --hash=sha256:a321c61477ff8ee705b8a5fed370b5710c56b3a52d17b983d9215861e37b642a \ + --hash=sha256:a5761c70c017c1b0d21b0815a920ffb94a670c8d5d409d9b38857874c21f70d7 \ + --hash=sha256:a9abbccd778d98e9c7e85038e35e91e67f5b520776781d9a1e2ee9d400869487 \ + --hash=sha256:ad80e6b4a0c3cb6f10f29ae4c60e991f424e6b14219d46f1e7d442b938ee68a4 \ + --hash=sha256:b44674870709017e4b4036e3d0d6c17f06a0e6d4436422e0ad29b882c40697d2 \ + --hash=sha256:b571bf5341ba8c6bc02e0baeaf3b061ab993bf372d982ae509807e7f112554e9 \ + --hash=sha256:b8194fb8e50d556d5849753de991d390c5a1edeeba50f68e3a9253fbd8bf8ccd \ + --hash=sha256:b87eb6fc9e1bb8f98892a2458781348fa37e6925f35bb6ceb9d4afd54ba36c73 \ + --hash=sha256:bbb5cc845a0292e0c520656d19d7ce40e18d0e19b22cb3e0409135a575bf79fc \ + --hash=sha256:be945402e03de47ba1872cd5236395e0f4ad635526185a930735f66710e1bd3f \ + --hash=sha256:bf13d564d310c156d1c8e53877baf2993fb3073b2fc9f69790ca6a732eb4bfea \ + --hash=sha256:cf60dd2696b457b710dd40bf17ad269d5f5457b96442f7f85722bdb16fa6c899 \ + --hash=sha256:d1ba00ae33be84066cfbe7361d4e04dec78445b2b88bdb734d0d1cbab916025a \ + --hash=sha256:d39fc4817fd67b3915256af5dda75fd4ee10621a3d484524487e33416c6f3543 \ + --hash=sha256:d766a4f0e5aa1ba056ec3496243150698dc0481902e2b8559314368717be82b1 \ + --hash=sha256:dbf364b4c5e7bae9250528167dfe40219b62e2d573c854d74be213e1e52069f7 \ + --hash=sha256:dd19608788b50eed889e13a5d71d832edc34fc9dfce606f66e8f9f917eef910d \ + --hash=sha256:e013b07ba1c748dacc2a80e69a46286ff145935f260eb8c72df7185bf048f502 \ + --hash=sha256:e5d2b9be5b0693cf21eb4ce0ec8d211efb43966f6657807f6859aab3814f946b \ + --hash=sha256:e5ff52d790c7e1628241ffbcaeb33e07d14b007b6eb00a19320c7b8a7024c040 \ + --hash=sha256:e75a2ad7b647fd8046d58c3132d7eaf31b12d8a53c0e4b21fa9c4d23d6ee6d3c \ + --hash=sha256:e7ac22a0bb2c7c49f441f7a6d46c9c80d96e56f5a8bc6972529ed43c8b694e27 \ + --hash=sha256:ed2144b8a78f9d94d9515963ed273d620e07846acd5d4b0a642d4849e8d91a0c \ + --hash=sha256:f017a61399f13aa6d1039f75cd467be388d157cd81f1a119b9d9a68ba6f2830d \ + --hash=sha256:f1d8a2a57b47142b10374902777e798784abf400a004b14f1b0b9eaf1e528ba4 \ + --hash=sha256:f2d32f95922927186c6dbc8bc60df0d186b6edb828d299ab10898ef3f40052fe \ + --hash=sha256:f319bae0321bc838e205bf9e5bc28f0a3165f30c203b610f17ab5552cff90323 \ + --hash=sha256:f3c38e4e5ccbdc9198aecc766cedbb134b2d89bf64533973678dfcf07effd883 \ + --hash=sha256:f9983d01d7705b2d1f7a95e10bbe4091fabc03a46881a256c2787637b087003f \ + --hash=sha256:fa260de59dfb143af06dcf30c2be0b200bed2a73737a8a59248fcb9fa601ef0f +decorator==5.2.1 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \ + --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a +distlib==0.3.9 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 -exceptiongroup==1.2.2 ; python_version >= "3.9" and python_version < "3.11" \ +exceptiongroup==1.2.2 ; python_version == "3.10" \ --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \ --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc -filelock==3.17.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338 \ - --hash=sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e -importlib-metadata==8.6.1 ; python_version >= "3.9" and python_full_version < "3.10.2" \ +executing==2.2.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \ + --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755 +filelock==3.18.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \ + --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de +hip-python==6.3.3.540.31 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:254aba3a63a5f8910606539e59de710845515e0c2fb2bdcbbc2dc4c18754dfbc \ + --hash=sha256:5c0da34b3872d31f8bf2f857bcdedfb84f9740c3fd3e1ace026fde65219815d8 \ + --hash=sha256:6ce1b96f63f3427b4433428505c3c2d7b3959a253751decfe1ccdc05407d8c0d \ + --hash=sha256:77b54fa241ddd565ad4d39f927633c75a69708762987f77392657692b247b24d \ + --hash=sha256:7f9dbfbda4baf7e11a3eb3ddf728622880c79f3c525c68161566c9e471da4f4b \ + --hash=sha256:994ef7f5b04be6c9544ef943eeac7cc282fc9583f621bfd615d18b7a812ef9ce +importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \ --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \ --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580 -iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ - --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 -joblib==1.4.2 ; python_version >= "3.9" and python_version < "3.15" \ +iniconfig==2.1.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \ + --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760 +ipython-pygments-lexers==1.1.1 ; python_version >= "3.11" and python_version < "4" \ + --hash=sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81 \ + --hash=sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c +ipython==8.34.0 ; python_version == "3.10" \ + --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \ + --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a +ipython==9.0.2 ; python_version >= "3.11" and python_version < "4" \ + --hash=sha256:143ef3ea6fb1e1bffb4c74b114051de653ffb7737a3f7ab1670e657ca6ae8c44 \ + --hash=sha256:ec7b479e3e5656bf4f58c652c120494df1820f4f28f522fb7ca09e213c2aab52 +jedi==0.19.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \ + --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9 +joblib==1.4.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e -jsonschema-specifications==2024.10.1 ; python_version >= "3.9" and python_version < "3.15" \ +jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \ --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf -jsonschema==4.23.0 ; python_version >= "3.9" and python_version < "3.15" \ +jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \ --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566 -mock==5.2.0 ; python_version >= "3.9" and python_version < "3.15" \ +markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ + --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ + --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ + --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ + --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ + --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ + --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ + --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ + --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ + --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ + --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ + --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ + --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ + --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ + --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ + --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ + --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ + --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ + --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ + --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ + --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ + --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ + --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ + --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ + --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ + --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ + --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ + --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ + --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ + --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ + --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ + --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ + --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ + --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ + --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ + --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ + --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ + --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ + --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ + --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ + --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ + --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ + --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ + --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ + --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ + --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ + --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ + --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ + --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ + --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ + --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ + --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ + --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ + --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ + --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ + --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ + --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ + --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ + --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ + --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 +matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \ + --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca +mock==5.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \ --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f -nox-poetry==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \ +nox-poetry==1.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \ --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682 -nox==2024.10.9 ; python_version >= "3.9" and python_version < "3.15" \ +nox==2024.10.9 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 -numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \ +numpy==1.26.4 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ @@ -147,10 +241,10 @@ numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f -packaging==24.2 ; python_version >= "3.9" and python_version < "3.15" \ +packaging==24.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f -pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \ +pandas==2.2.3 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \ --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \ --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \ @@ -193,158 +287,187 @@ pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \ --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \ --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319 -pep440==0.1.2 ; python_version >= "3.9" and python_version < "3.15" \ +parso==0.8.4 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \ + --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d +pep440==0.1.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \ --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d -platformdirs==4.3.6 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb -pluggy==1.5.0 ; python_version >= "3.9" and python_version < "3.15" \ +pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4" and sys_platform != "win32" and sys_platform != "emscripten" \ + --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \ + --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f +platformdirs==4.3.7 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \ + --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351 +pluggy==1.5.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 -pyproject-hooks==1.2.0 ; python_version >= "3.9" and python_version < "3.15" \ +prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \ + --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198 +ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4" and (os_name != "nt" or sys_platform != "win32" and sys_platform != "emscripten") \ + --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \ + --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220 +pure-eval==0.2.3 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \ + --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42 +pygments==2.19.1 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \ + --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c +pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \ --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913 -pytest-cov==5.0.0 ; python_version >= "3.9" and python_version < "3.15" \ +pytest-cov==5.0.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \ --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857 -pytest-timeout==2.3.1 ; python_version >= "3.9" and python_version < "3.15" \ +pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \ --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e -pytest==8.3.5 ; python_version >= "3.9" and python_version < "3.15" \ +pytest==8.3.5 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \ --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845 -python-constraint2==2.1.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:02f46e4a7e8a46048604870287f1c55312eea47c2c15dd58b51057cb7d057bdc \ - --hash=sha256:0e5ece0b4e85ed680af6b9db33ef3497a6f9499b8957cd830cd139f17ac29aef \ - --hash=sha256:0f3a09c1947e6a90b9558cd1651e86dbe10f698aad56247596f2b856307707f0 \ - --hash=sha256:1c650d717c2585fd8b2247f680ca1dcc6ea970cc5644c1d847f97eacb9f7dce2 \ - --hash=sha256:38e4dbb2522ca2295873a57f6e0fddbb0856a780c87edd79b4074fd78790fed3 \ - --hash=sha256:441f6a06e6c88c5fbe724b834c820d959ba7542037139153d1466c7be00c7cc0 \ - --hash=sha256:6b8f82be66242fc5587011360b07c39e6e71e5d1c8f26a107dd2b04ab7854fcc \ - --hash=sha256:8086a21724048746e68ab721cb4a216db15f86bb700d557af0ac60f2087d4d4e \ - --hash=sha256:abea9ae443bf33fb396a6fb597b713e110f2abd9ecf1a656cd81f53da6751b79 \ - --hash=sha256:ace17786565250de48b8d18da555feb31f5fb3521b2bd65e9871459e1d179600 \ - --hash=sha256:b2385c99a9fe67ae26085a5a048c1d206cf0bd74acf0cd036227afa2a90fa4fd \ - --hash=sha256:e29bed90abe1240bf24794e73e4d8fa3e50b6aa9226d915b1902cdd03375c28b \ - --hash=sha256:ee3d33ca5694724a17bb596b93ff8687c70b4c07945e40a9007250e282e7ab28 \ - --hash=sha256:f28d07eae04d83d454f0e6ba2da0678786a21f2d405998a3eec960b56d809692 \ - --hash=sha256:fbb6ab033a7a4250bce11ca12fdf8958c6c42853e933cf585dbd265e0967dd93 \ - --hash=sha256:fc3cffd0f16cb9b34d2e95bd6d27425dd24044073760477a1341e835fc9c45f4 -python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.15" \ +python-constraint2==2.2.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:02dcdf6d6f2d403b6304dddb242ef1b3db791600c7b8f8cd895dc3f87509bc6e \ + --hash=sha256:0951ff7ee0d549037ed078ecf828f33003730531a7231f9773c3674553362efa \ + --hash=sha256:21909f3e0dc12448eb4a317f109fb01bc766fb8db20cebc84493ed63f3db8670 \ + --hash=sha256:26af50fda2f0eecea191b9ec5203ededdeda4e16bc5987d0362997dbac01cf9a \ + --hash=sha256:26d5360d7b8563c2b2e25ad11398a5ed5fff346ad3c8be4ee9869fb52c4f921b \ + --hash=sha256:339ee2b5efc8ba19d5a789e6fea848c1d053bea52a7064bfff2c1414a8de46f2 \ + --hash=sha256:5fd97d5b23cb5cc1b18e223745d16d4dcaccdc6104aeb537609ce9aadd1a65da \ + --hash=sha256:90ba6f209b8e91d6e53390af37bcbde11d2b2e38bd7383b7934a7301e5d36775 \ + --hash=sha256:94de42fc08dfb4fcef800d1d974609a25a842983543f96f97440ab2694c8763a \ + --hash=sha256:b3dc9e4ff47941f9ca74789bfcc3340c55804b492a71f6bc532c7a52b739f2b4 \ + --hash=sha256:b4be72425c887537f224c45118713cc49f996f41e7e684b74a07379ac07dd7a5 \ + --hash=sha256:b60067157956dca4fee0ae619b2eadd3e79cfceb30843cab2e0e07b0d294759e \ + --hash=sha256:b9d13b56d65984f752a6300f737d7907993b8248179cc389a2f8f6ebe24b8ec9 \ + --hash=sha256:bdd4b448c4dcaee76b649ee225ddfc03b613b2dbf611847b346d15f7823ab1e2 \ + --hash=sha256:c6ac87e3d0953218edbcf1f9c4aa9b59aca83aa383f0cc4f0bb2343e39253026 \ + --hash=sha256:e0064a8d4cc88161cd2378cf102fe3453503f116ab6e4932c5f74108aba072ee +python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 -pytz==2025.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57 \ - --hash=sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e -referencing==0.36.2 ; python_version >= "3.9" and python_version < "3.15" \ +pytz==2025.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \ + --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00 +referencing==0.36.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \ --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0 -rpds-py==0.23.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \ - --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \ - --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \ - --hash=sha256:112b8774b0b4ee22368fec42749b94366bd9b536f8f74c3d4175d4395f5cbd31 \ - --hash=sha256:11dd60b2ffddba85715d8a66bb39b95ddbe389ad2cfcf42c833f1bcde0878eaf \ - --hash=sha256:178f8a60fc24511c0eb756af741c476b87b610dba83270fce1e5a430204566a4 \ - --hash=sha256:1b08027489ba8fedde72ddd233a5ea411b85a6ed78175f40285bd401bde7466d \ - --hash=sha256:1bf5be5ba34e19be579ae873da515a2836a2166d8d7ee43be6ff909eda42b72b \ - --hash=sha256:1ed7de3c86721b4e83ac440751329ec6a1102229aa18163f84c75b06b525ad7e \ - --hash=sha256:1eedaaccc9bb66581d4ae7c50e15856e335e57ef2734dbc5fd8ba3e2a4ab3cb6 \ - --hash=sha256:243241c95174b5fb7204c04595852fe3943cc41f47aa14c3828bc18cd9d3b2d6 \ - --hash=sha256:26bb3e8de93443d55e2e748e9fd87deb5f8075ca7bc0502cfc8be8687d69a2ec \ - --hash=sha256:271fa2184cf28bdded86bb6217c8e08d3a169fe0bbe9be5e8d96e8476b707122 \ - --hash=sha256:28358c54fffadf0ae893f6c1050e8f8853e45df22483b7fff2f6ab6152f5d8bf \ - --hash=sha256:285019078537949cecd0190f3690a0b0125ff743d6a53dfeb7a4e6787af154f5 \ - --hash=sha256:2893d778d4671ee627bac4037a075168b2673c57186fb1a57e993465dbd79a93 \ - --hash=sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed \ - --hash=sha256:2c6ae11e6e93728d86aafc51ced98b1658a0080a7dd9417d24bfb955bb09c3c2 \ - --hash=sha256:2cfa07c346a7ad07019c33fb9a63cf3acb1f5363c33bc73014e20d9fe8b01cdd \ - --hash=sha256:35d5631ce0af26318dba0ae0ac941c534453e42f569011585cb323b7774502a5 \ - --hash=sha256:3614d280bf7aab0d3721b5ce0e73434acb90a2c993121b6e81a1c15c665298ac \ - --hash=sha256:3902df19540e9af4cc0c3ae75974c65d2c156b9257e91f5101a51f99136d834c \ - --hash=sha256:3aaf141d39f45322e44fc2c742e4b8b4098ead5317e5f884770c8df0c332da70 \ - --hash=sha256:3d8abf7896a91fb97e7977d1aadfcc2c80415d6dc2f1d0fca5b8d0df247248f3 \ - --hash=sha256:3e77febf227a1dc3220159355dba68faa13f8dca9335d97504abf428469fb18b \ - --hash=sha256:3e9212f52074fc9d72cf242a84063787ab8e21e0950d4d6709886fb62bcb91d5 \ - --hash=sha256:3ee9d6f0b38efb22ad94c3b68ffebe4c47865cdf4b17f6806d6c674e1feb4246 \ - --hash=sha256:4233df01a250b3984465faed12ad472f035b7cd5240ea3f7c76b7a7016084495 \ - --hash=sha256:4263320ed887ed843f85beba67f8b2d1483b5947f2dc73a8b068924558bfeace \ - --hash=sha256:4ab923167cfd945abb9b51a407407cf19f5bee35001221f2911dc85ffd35ff4f \ - --hash=sha256:4caafd1a22e5eaa3732acb7672a497123354bef79a9d7ceed43387d25025e935 \ - --hash=sha256:50fb62f8d8364978478b12d5f03bf028c6bc2af04082479299139dc26edf4c64 \ - --hash=sha256:55ff4151cfd4bc635e51cfb1c59ac9f7196b256b12e3a57deb9e5742e65941ad \ - --hash=sha256:5b98b6c953e5c2bda51ab4d5b4f172617d462eebc7f4bfdc7c7e6b423f6da957 \ - --hash=sha256:5c9ff044eb07c8468594d12602291c635da292308c8c619244e30698e7fc455a \ - --hash=sha256:5e9c206a1abc27e0588cf8b7c8246e51f1a16a103734f7750830a1ccb63f557a \ - --hash=sha256:5fb89edee2fa237584e532fbf78f0ddd1e49a47c7c8cfa153ab4849dc72a35e6 \ - --hash=sha256:633462ef7e61d839171bf206551d5ab42b30b71cac8f10a64a662536e057fdef \ - --hash=sha256:66f8d2a17e5838dd6fb9be6baaba8e75ae2f5fa6b6b755d597184bfcd3cb0eba \ - --hash=sha256:6959bb9928c5c999aba4a3f5a6799d571ddc2c59ff49917ecf55be2bbb4e3722 \ - --hash=sha256:698a79d295626ee292d1730bc2ef6e70a3ab135b1d79ada8fde3ed0047b65a10 \ - --hash=sha256:721f9c4011b443b6e84505fc00cc7aadc9d1743f1c988e4c89353e19c4a968ee \ - --hash=sha256:72e680c1518733b73c994361e4b06441b92e973ef7d9449feec72e8ee4f713da \ - --hash=sha256:75307599f0d25bf6937248e5ac4e3bde5ea72ae6618623b86146ccc7845ed00b \ - --hash=sha256:754fba3084b70162a6b91efceee8a3f06b19e43dac3f71841662053c0584209a \ - --hash=sha256:759462b2d0aa5a04be5b3e37fb8183615f47014ae6b116e17036b131985cb731 \ - --hash=sha256:7938c7b0599a05246d704b3f5e01be91a93b411d0d6cc62275f025293b8a11ce \ - --hash=sha256:7b77e07233925bd33fc0022b8537774423e4c6680b6436316c5075e79b6384f4 \ - --hash=sha256:7e5413d2e2d86025e73f05510ad23dad5950ab8417b7fc6beaad99be8077138b \ - --hash=sha256:7f3240dcfa14d198dba24b8b9cb3b108c06b68d45b7babd9eefc1038fdf7e707 \ - --hash=sha256:7f9682a8f71acdf59fd554b82b1c12f517118ee72c0f3944eda461606dfe7eb9 \ - --hash=sha256:8d67beb6002441faef8251c45e24994de32c4c8686f7356a1f601ad7c466f7c3 \ - --hash=sha256:9441af1d25aed96901f97ad83d5c3e35e6cd21a25ca5e4916c82d7dd0490a4fa \ - --hash=sha256:98b257ae1e83f81fb947a363a274c4eb66640212516becaff7bef09a5dceacaa \ - --hash=sha256:9e9f3a3ac919406bc0414bbbd76c6af99253c507150191ea79fab42fdb35982a \ - --hash=sha256:a1c66e71ecfd2a4acf0e4bd75e7a3605afa8f9b28a3b497e4ba962719df2be57 \ - --hash=sha256:a1e17d8dc8e57d8e0fd21f8f0f0a5211b3fa258b2e444c2053471ef93fe25a00 \ - --hash=sha256:a20cb698c4a59c534c6701b1c24a968ff2768b18ea2991f886bd8985ce17a89f \ - --hash=sha256:a970bfaf130c29a679b1d0a6e0f867483cea455ab1535fb427566a475078f27f \ - --hash=sha256:a98f510d86f689fcb486dc59e6e363af04151e5260ad1bdddb5625c10f1e95f8 \ - --hash=sha256:a9d3b728f5a5873d84cba997b9d617c6090ca5721caaa691f3b1a78c60adc057 \ - --hash=sha256:ad76f44f70aac3a54ceb1813ca630c53415da3a24fd93c570b2dfb4856591017 \ - --hash=sha256:ae28144c1daa61366205d32abd8c90372790ff79fc60c1a8ad7fd3c8553a600e \ - --hash=sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165 \ - --hash=sha256:b5a96fcac2f18e5a0a23a75cd27ce2656c66c11c127b0318e508aab436b77428 \ - --hash=sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c \ - --hash=sha256:b79f5ced71efd70414a9a80bbbfaa7160da307723166f09b69773153bf17c590 \ - --hash=sha256:b91cceb5add79ee563bd1f70b30896bd63bc5f78a11c1f00a1e931729ca4f1f4 \ - --hash=sha256:b92f5654157de1379c509b15acec9d12ecf6e3bc1996571b6cb82a4302060447 \ - --hash=sha256:c04ca91dda8a61584165825907f5c967ca09e9c65fe8966ee753a3f2b019fe1e \ - --hash=sha256:c1f8afa346ccd59e4e5630d5abb67aba6a9812fddf764fd7eb11f382a345f8cc \ - --hash=sha256:c5334a71f7dc1160382d45997e29f2637c02f8a26af41073189d79b95d3321f1 \ - --hash=sha256:c617d7453a80e29d9973b926983b1e700a9377dbe021faa36041c78537d7b08c \ - --hash=sha256:c632419c3870507ca20a37c8f8f5352317aca097639e524ad129f58c125c61c6 \ - --hash=sha256:c6760211eee3a76316cf328f5a8bd695b47b1626d21c8a27fb3b2473a884d597 \ - --hash=sha256:c698d123ce5d8f2d0cd17f73336615f6a2e3bdcedac07a1291bb4d8e7d82a05a \ - --hash=sha256:c76b32eb2ab650a29e423525e84eb197c45504b1c1e6e17b6cc91fcfeb1a4b1d \ - --hash=sha256:c8f7e90b948dc9dcfff8003f1ea3af08b29c062f681c05fd798e36daa3f7e3e8 \ - --hash=sha256:c9e799dac1ffbe7b10c1fd42fe4cd51371a549c6e108249bde9cd1200e8f59b4 \ - --hash=sha256:cafa48f2133d4daa028473ede7d81cd1b9f9e6925e9e4003ebdf77010ee02f35 \ - --hash=sha256:ce473a2351c018b06dd8d30d5da8ab5a0831056cc53b2006e2a8028172c37ce5 \ - --hash=sha256:d31ed4987d72aabdf521eddfb6a72988703c091cfc0064330b9e5f8d6a042ff5 \ - --hash=sha256:d550d7e9e7d8676b183b37d65b5cd8de13676a738973d330b59dc8312df9c5dc \ - --hash=sha256:d6adb81564af0cd428910f83fa7da46ce9ad47c56c0b22b50872bc4515d91966 \ - --hash=sha256:d6f6512a90bd5cd9030a6237f5346f046c6f0e40af98657568fa45695d4de59d \ - --hash=sha256:d7031d493c4465dbc8d40bd6cafefef4bd472b17db0ab94c53e7909ee781b9ef \ - --hash=sha256:d9f75a06ecc68f159d5d7603b734e1ff6daa9497a929150f794013aa9f6e3f12 \ - --hash=sha256:db7707dde9143a67b8812c7e66aeb2d843fe33cc8e374170f4d2c50bd8f2472d \ - --hash=sha256:e0397dd0b3955c61ef9b22838144aa4bef6f0796ba5cc8edfc64d468b93798b4 \ - --hash=sha256:e0df046f2266e8586cf09d00588302a32923eb6386ced0ca5c9deade6af9a149 \ - --hash=sha256:e14f86b871ea74c3fddc9a40e947d6a5d09def5adc2076ee61fb910a9014fb35 \ - --hash=sha256:e5963ea87f88bddf7edd59644a35a0feecf75f8985430124c253612d4f7d27ae \ - --hash=sha256:e768267cbe051dd8d1c5305ba690bb153204a09bf2e3de3ae530de955f5b5580 \ - --hash=sha256:e9cb79ecedfc156c0692257ac7ed415243b6c35dd969baa461a6888fc79f2f07 \ - --hash=sha256:ed6f011bedca8585787e5082cce081bac3d30f54520097b2411351b3574e1219 \ - --hash=sha256:f3429fb8e15b20961efca8c8b21432623d85db2228cc73fe22756c6637aa39e7 \ - --hash=sha256:f35eff113ad430b5272bbfc18ba111c66ff525828f24898b4e146eb479a2cdda \ - --hash=sha256:f3a6cb95074777f1ecda2ca4fa7717caa9ee6e534f42b7575a8f0d4cb0c24013 \ - --hash=sha256:f7356a6da0562190558c4fcc14f0281db191cdf4cb96e7604c06acfcee96df15 \ - --hash=sha256:f88626e3f5e57432e6191cd0c5d6d6b319b635e70b40be2ffba713053e5147dd \ - --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \ - --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \ - --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8 -ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \ +rpds-py==0.24.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:0047638c3aa0dbcd0ab99ed1e549bbf0e142c9ecc173b6492868432d8989a046 \ + --hash=sha256:006f4342fe729a368c6df36578d7a348c7c716be1da0a1a0f86e3021f8e98724 \ + --hash=sha256:041f00419e1da7a03c46042453598479f45be3d787eb837af382bfc169c0db33 \ + --hash=sha256:04ecf5c1ff4d589987b4d9882872f80ba13da7d42427234fce8f22efb43133bc \ + --hash=sha256:04f2b712a2206e13800a8136b07aaedc23af3facab84918e7aa89e4be0260032 \ + --hash=sha256:0aeb3329c1721c43c58cae274d7d2ca85c1690d89485d9c63a006cb79a85771a \ + --hash=sha256:0e374c0ce0ca82e5b67cd61fb964077d40ec177dd2c4eda67dba130de09085c7 \ + --hash=sha256:0f00c16e089282ad68a3820fd0c831c35d3194b7cdc31d6e469511d9bffc535c \ + --hash=sha256:174e46569968ddbbeb8a806d9922f17cd2b524aa753b468f35b97ff9c19cb718 \ + --hash=sha256:1b221c2457d92a1fb3c97bee9095c874144d196f47c038462ae6e4a14436f7bc \ + --hash=sha256:208b3a70a98cf3710e97cabdc308a51cd4f28aa6e7bb11de3d56cd8b74bab98d \ + --hash=sha256:20f2712bd1cc26a3cc16c5a1bfee9ed1abc33d4cdf1aabd297fe0eb724df4272 \ + --hash=sha256:24795c099453e3721fda5d8ddd45f5dfcc8e5a547ce7b8e9da06fecc3832e26f \ + --hash=sha256:2a0f156e9509cee987283abd2296ec816225145a13ed0391df8f71bf1d789e2d \ + --hash=sha256:2b2356688e5d958c4d5cb964af865bea84db29971d3e563fb78e46e20fe1848b \ + --hash=sha256:2c13777ecdbbba2077670285dd1fe50828c8742f6a4119dbef6f83ea13ad10fb \ + --hash=sha256:2d3ee4615df36ab8eb16c2507b11e764dcc11fd350bbf4da16d09cda11fcedef \ + --hash=sha256:2d53747da70a4e4b17f559569d5f9506420966083a31c5fbd84e764461c4444b \ + --hash=sha256:32bab0a56eac685828e00cc2f5d1200c548f8bc11f2e44abf311d6b548ce2e45 \ + --hash=sha256:34d90ad8c045df9a4259c47d2e16a3f21fdb396665c94520dbfe8766e62187a4 \ + --hash=sha256:369d9c6d4c714e36d4a03957b4783217a3ccd1e222cdd67d464a3a479fc17796 \ + --hash=sha256:3a55fc10fdcbf1a4bd3c018eea422c52cf08700cf99c28b5cb10fe97ab77a0d3 \ + --hash=sha256:3d2d8e4508e15fc05b31285c4b00ddf2e0eb94259c2dc896771966a163122a0c \ + --hash=sha256:3fab5f4a2c64a8fb64fc13b3d139848817a64d467dd6ed60dcdd6b479e7febc9 \ + --hash=sha256:43dba99f00f1d37b2a0265a259592d05fcc8e7c19d140fe51c6e6f16faabeb1f \ + --hash=sha256:44d51febb7a114293ffd56c6cf4736cb31cd68c0fddd6aa303ed09ea5a48e029 \ + --hash=sha256:493fe54318bed7d124ce272fc36adbf59d46729659b2c792e87c3b95649cdee9 \ + --hash=sha256:4b28e5122829181de1898c2c97f81c0b3246d49f585f22743a1246420bb8d399 \ + --hash=sha256:4cd031e63bc5f05bdcda120646a0d32f6d729486d0067f09d79c8db5368f4586 \ + --hash=sha256:528927e63a70b4d5f3f5ccc1fa988a35456eb5d15f804d276709c33fc2f19bda \ + --hash=sha256:564c96b6076a98215af52f55efa90d8419cc2ef45d99e314fddefe816bc24f91 \ + --hash=sha256:5db385bacd0c43f24be92b60c857cf760b7f10d8234f4bd4be67b5b20a7c0b6b \ + --hash=sha256:5ef877fa3bbfb40b388a5ae1cb00636a624690dcb9a29a65267054c9ea86d88a \ + --hash=sha256:5f6e3cec44ba05ee5cbdebe92d052f69b63ae792e7d05f1020ac5e964394080c \ + --hash=sha256:5fc13b44de6419d1e7a7e592a4885b323fbc2f46e1f22151e3a8ed3b8b920405 \ + --hash=sha256:60748789e028d2a46fc1c70750454f83c6bdd0d05db50f5ae83e2db500b34da5 \ + --hash=sha256:60d9b630c8025b9458a9d114e3af579a2c54bd32df601c4581bd054e85258143 \ + --hash=sha256:619ca56a5468f933d940e1bf431c6f4e13bef8e688698b067ae68eb4f9b30e3a \ + --hash=sha256:630d3d8ea77eabd6cbcd2ea712e1c5cecb5b558d39547ac988351195db433f6c \ + --hash=sha256:63981feca3f110ed132fd217bf7768ee8ed738a55549883628ee3da75bb9cb78 \ + --hash=sha256:66420986c9afff67ef0c5d1e4cdc2d0e5262f53ad11e4f90e5e22448df485bf0 \ + --hash=sha256:675269d407a257b8c00a6b58205b72eec8231656506c56fd429d924ca00bb350 \ + --hash=sha256:6a4a535013aeeef13c5532f802708cecae8d66c282babb5cd916379b72110cf7 \ + --hash=sha256:6a727fd083009bc83eb83d6950f0c32b3c94c8b80a9b667c87f4bd1274ca30ba \ + --hash=sha256:6e1daf5bf6c2be39654beae83ee6b9a12347cb5aced9a29eecf12a2d25fff664 \ + --hash=sha256:6eea559077d29486c68218178ea946263b87f1c41ae7f996b1f30a983c476a5a \ + --hash=sha256:75a810b7664c17f24bf2ffd7f92416c00ec84b49bb68e6a0d93e542406336b56 \ + --hash=sha256:772cc1b2cd963e7e17e6cc55fe0371fb9c704d63e44cacec7b9b7f523b78919e \ + --hash=sha256:78884d155fd15d9f64f5d6124b486f3d3f7fd7cd71a78e9670a0f6f6ca06fb2d \ + --hash=sha256:79e8d804c2ccd618417e96720ad5cd076a86fa3f8cb310ea386a3e6229bae7d1 \ + --hash=sha256:7e80d375134ddb04231a53800503752093dbb65dad8dabacce2c84cccc78e964 \ + --hash=sha256:8097b3422d020ff1c44effc40ae58e67d93e60d540a65649d2cdaf9466030791 \ + --hash=sha256:8205ee14463248d3349131bb8099efe15cd3ce83b8ef3ace63c7e976998e7124 \ + --hash=sha256:8212ff58ac6dfde49946bea57474a386cca3f7706fc72c25b772b9ca4af6b79e \ + --hash=sha256:823e74ab6fbaa028ec89615ff6acb409e90ff45580c45920d4dfdddb069f2120 \ + --hash=sha256:84e0566f15cf4d769dade9b366b7b87c959be472c92dffb70462dd0844d7cbad \ + --hash=sha256:896c41007931217a343eff197c34513c154267636c8056fb409eafd494c3dcdc \ + --hash=sha256:8aa362811ccdc1f8dadcc916c6d47e554169ab79559319ae9fae7d7752d0d60c \ + --hash=sha256:8b3b397eefecec8e8e39fa65c630ef70a24b09141a6f9fc17b3c3a50bed6b50e \ + --hash=sha256:8ebc7e65ca4b111d928b669713865f021b7773350eeac4a31d3e70144297baba \ + --hash=sha256:9168764133fd919f8dcca2ead66de0105f4ef5659cbb4fa044f7014bed9a1797 \ + --hash=sha256:921ae54f9ecba3b6325df425cf72c074cd469dea843fb5743a26ca7fb2ccb149 \ + --hash=sha256:92558d37d872e808944c3c96d0423b8604879a3d1c86fdad508d7ed91ea547d5 \ + --hash=sha256:951cc481c0c395c4a08639a469d53b7d4afa252529a085418b82a6b43c45c240 \ + --hash=sha256:998c01b8e71cf051c28f5d6f1187abbdf5cf45fc0efce5da6c06447cba997034 \ + --hash=sha256:9abc80fe8c1f87218db116016de575a7998ab1629078c90840e8d11ab423ee25 \ + --hash=sha256:9be4f99bee42ac107870c61dfdb294d912bf81c3c6d45538aad7aecab468b6b7 \ + --hash=sha256:9c39438c55983d48f4bb3487734d040e22dad200dab22c41e331cee145e7a50d \ + --hash=sha256:9d7e8ce990ae17dda686f7e82fd41a055c668e13ddcf058e7fb5e9da20b57793 \ + --hash=sha256:9ea7f4174d2e4194289cb0c4e172d83e79a6404297ff95f2875cf9ac9bced8ba \ + --hash=sha256:a18fc371e900a21d7392517c6f60fe859e802547309e94313cd8181ad9db004d \ + --hash=sha256:a36b452abbf29f68527cf52e181fced56685731c86b52e852053e38d8b60bc8d \ + --hash=sha256:a5b66d1b201cc71bc3081bc2f1fc36b0c1f268b773e03bbc39066651b9e18391 \ + --hash=sha256:a824d2c7a703ba6daaca848f9c3d5cb93af0505be505de70e7e66829affd676e \ + --hash=sha256:a88c0d17d039333a41d9bf4616bd062f0bd7aa0edeb6cafe00a2fc2a804e944f \ + --hash=sha256:aa6800adc8204ce898c8a424303969b7aa6a5e4ad2789c13f8648739830323b7 \ + --hash=sha256:aad911555286884be1e427ef0dc0ba3929e6821cbeca2194b13dc415a462c7fd \ + --hash=sha256:afc6e35f344490faa8276b5f2f7cbf71f88bc2cda4328e00553bd451728c571f \ + --hash=sha256:b9a4df06c35465ef4d81799999bba810c68d29972bf1c31db61bfdb81dd9d5bb \ + --hash=sha256:bb2954155bb8f63bb19d56d80e5e5320b61d71084617ed89efedb861a684baea \ + --hash=sha256:bbc4362e06f950c62cad3d4abf1191021b2ffaf0b31ac230fbf0526453eee75e \ + --hash=sha256:c0145295ca415668420ad142ee42189f78d27af806fcf1f32a18e51d47dd2052 \ + --hash=sha256:c30ff468163a48535ee7e9bf21bd14c7a81147c0e58a36c1078289a8ca7af0bd \ + --hash=sha256:c347a20d79cedc0a7bd51c4d4b7dbc613ca4e65a756b5c3e57ec84bd43505b47 \ + --hash=sha256:c43583ea8517ed2e780a345dd9960896afc1327e8cf3ac8239c167530397440d \ + --hash=sha256:c61a2cb0085c8783906b2f8b1f16a7e65777823c7f4d0a6aaffe26dc0d358dd9 \ + --hash=sha256:c9ca89938dff18828a328af41ffdf3902405a19f4131c88e22e776a8e228c5a8 \ + --hash=sha256:cc31e13ce212e14a539d430428cd365e74f8b2d534f8bc22dd4c9c55b277b875 \ + --hash=sha256:cdabcd3beb2a6dca7027007473d8ef1c3b053347c76f685f5f060a00327b8b65 \ + --hash=sha256:cf86f72d705fc2ef776bb7dd9e5fbba79d7e1f3e258bf9377f8204ad0fc1c51e \ + --hash=sha256:d09dc82af2d3c17e7dd17120b202a79b578d79f2b5424bda209d9966efeed114 \ + --hash=sha256:d3aa13bdf38630da298f2e0d77aca967b200b8cc1473ea05248f6c5e9c9bdb44 \ + --hash=sha256:d69d003296df4840bd445a5d15fa5b6ff6ac40496f956a221c4d1f6f7b4bc4d9 \ + --hash=sha256:d6e109a454412ab82979c5b1b3aee0604eca4bbf9a02693bb9df027af2bfa91a \ + --hash=sha256:d8551e733626afec514b5d15befabea0dd70a343a9f23322860c4f16a9430205 \ + --hash=sha256:d8754d872a5dfc3c5bf9c0e059e8107451364a30d9fd50f1f1a85c4fb9481164 \ + --hash=sha256:d8f9a6e7fd5434817526815f09ea27f2746c4a51ee11bb3439065f5fc754db58 \ + --hash=sha256:dbcbb6db5582ea33ce46a5d20a5793134b5365110d84df4e30b9d37c6fd40ad3 \ + --hash=sha256:e0f3ef95795efcd3b2ec3fe0a5bcfb5dadf5e3996ea2117427e524d4fbf309c6 \ + --hash=sha256:e13ae74a8a3a0c2f22f450f773e35f893484fcfacb00bb4344a7e0f4f48e1f97 \ + --hash=sha256:e274f62cbd274359eff63e5c7e7274c913e8e09620f6a57aae66744b3df046d6 \ + --hash=sha256:e838bf2bb0b91ee67bf2b889a1a841e5ecac06dd7a2b1ef4e6151e2ce155c7ae \ + --hash=sha256:e8acd55bd5b071156bae57b555f5d33697998752673b9de554dd82f5b5352727 \ + --hash=sha256:e8e5ab32cf9eb3647450bc74eb201b27c185d3857276162c101c0f8c6374e098 \ + --hash=sha256:ebcb786b9ff30b994d5969213a8430cbb984cdd7ea9fd6df06663194bd3c450c \ + --hash=sha256:ebea2821cdb5f9fef44933617be76185b80150632736f3d76e54829ab4a3b4d1 \ + --hash=sha256:ed0ef550042a8dbcd657dfb284a8ee00f0ba269d3f2286b0493b15a5694f9fe8 \ + --hash=sha256:eda5c1e2a715a4cbbca2d6d304988460942551e4e5e3b7457b50943cd741626d \ + --hash=sha256:f5c0ed12926dec1dfe7d645333ea59cf93f4d07750986a586f511c0bc61fe103 \ + --hash=sha256:f6016bd950be4dcd047b7475fdf55fb1e1f59fc7403f387be0e8123e4a576d30 \ + --hash=sha256:f9e0057a509e096e47c87f753136c9b10d7a91842d8042c2ee6866899a717c0d \ + --hash=sha256:fc1c892b1ec1f8cbd5da8de287577b455e388d9c328ad592eabbdcb6fc93bee5 \ + --hash=sha256:fc2c1e1b00f88317d9de6b2c2b39b012ebbfe35fe5e7bef980fd2a91f6100a07 \ + --hash=sha256:fd822f019ccccd75c832deb7aa040bb02d70a92eb15a2f16c7987b7ad4ee8d83 +ruff==0.4.10 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \ --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \ --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \ @@ -362,7 +485,7 @@ ruff==0.4.10 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \ --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \ --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca -scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \ +scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \ --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \ --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \ @@ -393,39 +516,63 @@ scikit-learn==1.6.1 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \ --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \ --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415 -scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d \ - --hash=sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c \ - --hash=sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca \ - --hash=sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9 \ - --hash=sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54 \ - --hash=sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16 \ - --hash=sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2 \ - --hash=sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5 \ - --hash=sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59 \ - --hash=sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326 \ - --hash=sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b \ - --hash=sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1 \ - --hash=sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d \ - --hash=sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24 \ - --hash=sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627 \ - --hash=sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c \ - --hash=sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa \ - --hash=sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949 \ - --hash=sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989 \ - --hash=sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004 \ - --hash=sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f \ - --hash=sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884 \ - --hash=sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299 \ - --hash=sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94 \ - --hash=sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f -six==1.17.0 ; python_version >= "3.9" and python_version < "3.15" \ +scipy==1.15.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \ + --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \ + --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \ + --hash=sha256:11e7ad32cf184b74380f43d3c0a706f49358b904fa7d5345f16ddf993609184d \ + --hash=sha256:28a0d2c2075946346e4408b211240764759e0fabaeb08d871639b5f3b1aca8a0 \ + --hash=sha256:2b871df1fe1a3ba85d90e22742b93584f8d2b8e6124f8372ab15c71b73e428b8 \ + --hash=sha256:302093e7dfb120e55515936cb55618ee0b895f8bcaf18ff81eca086c17bd80af \ + --hash=sha256:42dabaaa798e987c425ed76062794e93a243be8f0f20fff6e7a89f4d61cb3d40 \ + --hash=sha256:447ce30cee6a9d5d1379087c9e474628dab3db4a67484be1b7dc3196bfb2fac9 \ + --hash=sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971 \ + --hash=sha256:54c462098484e7466362a9f1672d20888f724911a74c22ae35b61f9c5919183d \ + --hash=sha256:597a0c7008b21c035831c39927406c6181bcf8f60a73f36219b69d010aa04737 \ + --hash=sha256:5a6fd6eac1ce74a9f77a7fc724080d507c5812d61e72bd5e4c489b042455865e \ + --hash=sha256:5ea7ed46d437fc52350b028b1d44e002646e28f3e8ddc714011aaf87330f2f32 \ + --hash=sha256:601881dfb761311045b03114c5fe718a12634e5608c3b403737ae463c9885d53 \ + --hash=sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1 \ + --hash=sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d \ + --hash=sha256:6a8e34cf4c188b6dd004654f88586d78f95639e48a25dfae9c5e34a6dc34547e \ + --hash=sha256:6d0194c37037707b2afa7a2f2a924cf7bac3dc292d51b6a925e5fcb89bc5c776 \ + --hash=sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5 \ + --hash=sha256:6f5e296ec63c5da6ba6fa0343ea73fd51b8b3e1a300b0a8cae3ed4b1122c7462 \ + --hash=sha256:7cd5b77413e1855351cdde594eca99c1f4a588c2d63711388b6a1f1c01f62274 \ + --hash=sha256:869269b767d5ee7ea6991ed7e22b3ca1f22de73ab9a49c44bad338b725603301 \ + --hash=sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3 \ + --hash=sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58 \ + --hash=sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4 \ + --hash=sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa \ + --hash=sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9 \ + --hash=sha256:9de9d1416b3d9e7df9923ab23cd2fe714244af10b763975bea9e4f2e81cebd27 \ + --hash=sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9 \ + --hash=sha256:a5080a79dfb9b78b768cebf3c9dcbc7b665c5875793569f48bf0e2b1d7f68f6f \ + --hash=sha256:a8bf5cb4a25046ac61d38f8d3c3426ec11ebc350246a4642f2f315fe95bda655 \ + --hash=sha256:b09ae80010f52efddb15551025f9016c910296cf70adbf03ce2a8704f3a5ad20 \ + --hash=sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65 \ + --hash=sha256:bad78d580270a4d32470563ea86c6590b465cb98f83d760ff5b0990cb5518a93 \ + --hash=sha256:bae43364d600fdc3ac327db99659dcb79e6e7ecd279a75fe1266669d9a652828 \ + --hash=sha256:c4697a10da8f8765bb7c83e24a470da5797e37041edfd77fd95ba3811a47c4fd \ + --hash=sha256:c90ebe8aaa4397eaefa8455a8182b164a6cc1d59ad53f79943f266d99f68687f \ + --hash=sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec \ + --hash=sha256:cf72ff559a53a6a6d77bd8eefd12a17995ffa44ad86c77a5df96f533d4e6c6bb \ + --hash=sha256:def751dd08243934c884a3221156d63e15234a3155cf25978b0a668409d45eb6 \ + --hash=sha256:e7c68b6a43259ba0aab737237876e5c2c549a031ddb7abc28c7b47f22e202ded \ + --hash=sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e \ + --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \ + --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \ + --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db +six==1.17.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 -threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107 \ - --hash=sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467 -tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \ +stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \ + --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695 +threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ + --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e +tomli==2.2.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ @@ -458,21 +605,27 @@ tomli==2.2.1 ; python_version >= "3.9" and python_version < "3.15" \ --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 -tomlkit==0.13.2 ; python_version >= "3.9" and python_version < "3.15" \ +tomlkit==0.13.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \ --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79 -typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 -tzdata==2025.1 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694 \ - --hash=sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639 -virtualenv==20.29.3 ; python_version >= "3.9" and python_version < "3.15" \ - --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \ - --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac -xmltodict==0.14.2 ; python_version >= "3.9" and python_version < "3.15" \ +traitlets==5.14.3 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \ + --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f +typing-extensions==4.13.1 ; python_version >= "3.10" and python_version < "3.13" \ + --hash=sha256:4b6cf02909eb5495cfbc3f6e8fd49217e6cc7944e145cdda8caa3734777f9e69 \ + --hash=sha256:98795af00fb9640edec5b8e31fc647597b4691f099ad75f469a2616be1a76dff +tzdata==2025.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \ + --hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9 +virtualenv==20.30.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:800863162bcaa5450a6e4d721049730e7f2dae07720e0902b0e4040bd6f9ada8 \ + --hash=sha256:e34302959180fca3af42d1800df014b35019490b119eba981af27f2fa486e5d6 +wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ + --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 +xmltodict==0.14.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \ --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac -zipp==3.21.0 ; python_version >= "3.9" and python_full_version < "3.10.2" \ +zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \ --hash=sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4 \ --hash=sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931 diff --git a/doc/source/dev-environment.rst b/doc/source/dev-environment.rst index 6c36101ac..0adb3c83e 100644 --- a/doc/source/dev-environment.rst +++ b/doc/source/dev-environment.rst @@ -27,8 +27,8 @@ Steps with :bash:`sudo` access (e.g. on a local device): * After installation, restart your shell. #. Install the required Python versions: * On some systems, additional packages may be needed to build Python versions. For example on Ubuntu: :bash:`sudo apt install build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev liblzma-dev lzma`. - * Install the Python versions with: :bash:`pyenv install 3.9 3.10 3.11 3.12`. The reason we're installing all these versions as opposed to just one, is so we can test against all supported Python versions. -#. Set the Python versions so they can be found: :bash:`pyenv local 3.9 3.10 3.11 3.12` (replace :bash:`local` with :bash:`global` when not using the virtualenv). + * Install the Python versions with: :bash:`pyenv install 3.9 3.10 3.11 3.12 3.13`. The reason we're installing all these versions as opposed to just one, is so we can test against all supported Python versions. +#. Set the Python versions so they can be found: :bash:`pyenv local 3.9 3.10 3.11 3.12 3.13` (replace :bash:`local` with :bash:`global` when not using the virtualenv). #. Setup a local virtual environment in the folder: :bash:`pyenv virtualenv 3.11 kerneltuner` (or whatever environment name and Python version you prefer). #. `Install Poetry `__. * Use :bash:`curl -sSL https://install.python-poetry.org | python3 -` to install Poetry. @@ -78,7 +78,7 @@ Steps without :bash:`sudo` access (e.g. on a cluster): * Verify that your development environment has no missing installs or updates with :bash:`poetry install --sync --dry-run --with test`. #. Check if the environment is setup correctly by running :bash:`pytest`. All tests should pass, except if you're not on a GPU node, or one or more extras has been left out in the previous step, then these tests will skip gracefully. #. Set Nox to use the correct backend and location: - * Run :bash:`conda -- create-settings-file` to automatically create a settings file. + * Run :bash:`nox -- create-settings-file` to automatically create a settings file. * In this settings file :bash:`noxsettings.toml`, change the :bash:`venvbackend`: * If you used Mamba in step 2, to :bash:`mamba`. * If you used Miniconda or Anaconda in step 2, to :bash:`conda`. diff --git a/examples/c/vector_add.py b/examples/c/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda-c++/vector_add.py b/examples/cuda-c++/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda-c++/vector_add_blocksize.py b/examples/cuda-c++/vector_add_blocksize.py old mode 100755 new mode 100644 diff --git a/examples/cuda-c++/vector_add_cupy.py b/examples/cuda-c++/vector_add_cupy.py old mode 100755 new mode 100644 diff --git a/examples/cuda/convolution.py b/examples/cuda/convolution.py old mode 100755 new mode 100644 diff --git a/examples/cuda/convolution_correct.py b/examples/cuda/convolution_correct.py old mode 100755 new mode 100644 diff --git a/examples/cuda/convolution_streams.py b/examples/cuda/convolution_streams.py old mode 100755 new mode 100644 diff --git a/examples/cuda/expdist.py b/examples/cuda/expdist.py old mode 100755 new mode 100644 diff --git a/examples/cuda/matmul.py b/examples/cuda/matmul.py old mode 100755 new mode 100644 diff --git a/examples/cuda/pnpoly.py b/examples/cuda/pnpoly.py old mode 100755 new mode 100644 diff --git a/examples/cuda/python_kernel.py b/examples/cuda/python_kernel.py old mode 100755 new mode 100644 diff --git a/examples/cuda/reduction.py b/examples/cuda/reduction.py old mode 100755 new mode 100644 diff --git a/examples/cuda/sepconv.py b/examples/cuda/sepconv.py old mode 100755 new mode 100644 diff --git a/examples/cuda/spmv.py b/examples/cuda/spmv.py old mode 100755 new mode 100644 diff --git a/examples/cuda/stencil.py b/examples/cuda/stencil.py old mode 100755 new mode 100644 diff --git a/examples/cuda/test_vector_add.py b/examples/cuda/test_vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda/test_vector_add_parameterized.py b/examples/cuda/test_vector_add_parameterized.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add.py b/examples/cuda/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_codegen.py b/examples/cuda/vector_add_codegen.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_cupy.py b/examples/cuda/vector_add_cupy.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_jinja.py b/examples/cuda/vector_add_jinja.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_metric.py b/examples/cuda/vector_add_metric.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_observers.py b/examples/cuda/vector_add_observers.py old mode 100755 new mode 100644 diff --git a/examples/cuda/zeromeanfilter.py b/examples/cuda/zeromeanfilter.py old mode 100755 new mode 100644 diff --git a/examples/fortran/vector_add.py b/examples/fortran/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/opencl/convolution.py b/examples/opencl/convolution.py old mode 100755 new mode 100644 diff --git a/examples/opencl/convolution_correct.py b/examples/opencl/convolution_correct.py old mode 100755 new mode 100644 diff --git a/examples/opencl/matmul.py b/examples/opencl/matmul.py old mode 100755 new mode 100644 diff --git a/examples/opencl/reduction.py b/examples/opencl/reduction.py old mode 100755 new mode 100644 diff --git a/examples/opencl/sepconv.py b/examples/opencl/sepconv.py old mode 100755 new mode 100644 diff --git a/examples/opencl/stencil.py b/examples/opencl/stencil.py old mode 100755 new mode 100644 diff --git a/examples/opencl/vector_add.py b/examples/opencl/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/opencl/vector_add_codegen.py b/examples/opencl/vector_add_codegen.py old mode 100755 new mode 100644 diff --git a/examples/opencl/vector_add_observers.py b/examples/opencl/vector_add_observers.py old mode 100755 new mode 100644 diff --git a/kernel_tuner/__init__.py b/kernel_tuner/__init__.py index b64d69813..40b88d463 100644 --- a/kernel_tuner/__init__.py +++ b/kernel_tuner/__init__.py @@ -1,5 +1,5 @@ from kernel_tuner.integration import store_results, create_device_targets -from kernel_tuner.interface import tune_kernel, run_kernel +from kernel_tuner.interface import tune_kernel, tune_kernel_T1, run_kernel from importlib.metadata import version diff --git a/kernel_tuner/backends/backend.py b/kernel_tuner/backends/backend.py index 586c3204f..6063dbb43 100644 --- a/kernel_tuner/backends/backend.py +++ b/kernel_tuner/backends/backend.py @@ -1,16 +1,16 @@ -"""This module contains the interface of all kernel_tuner backends""" +"""This module contains the interface of all kernel_tuner backends.""" from __future__ import print_function from abc import ABC, abstractmethod class Backend(ABC): - """Base class for kernel_tuner backends""" + """Base class for kernel_tuner backends.""" @abstractmethod def ready_argument_list(self, arguments): """This method must implement the allocation of the arguments on device memory.""" - pass + return arguments @abstractmethod def compile(self, kernel_instance): @@ -64,7 +64,7 @@ def refresh_memory(self, device_memory, host_arguments, should_sync): class GPUBackend(Backend): - """Base class for GPU backends""" + """Base class for GPU backends.""" @abstractmethod def __init__(self, device, iterations, compiler_options, observers): @@ -93,7 +93,7 @@ def refresh_memory(self, gpu_memory, host_arguments, should_sync): class CompilerBackend(Backend): - """Base class for compiler backends""" + """Base class for compiler backends.""" @abstractmethod def __init__(self, iterations, compiler_options, compiler): diff --git a/kernel_tuner/backends/cupy.py b/kernel_tuner/backends/cupy.py index 914f211a7..51613be7c 100644 --- a/kernel_tuner/backends/cupy.py +++ b/kernel_tuner/backends/cupy.py @@ -1,6 +1,5 @@ """This module contains all Cupy specific kernel_tuner functions.""" from __future__ import print_function -from warnings import warn import numpy as np diff --git a/kernel_tuner/backends/hip.py b/kernel_tuner/backends/hip.py index 46d2d50a5..14bc82457 100644 --- a/kernel_tuner/backends/hip.py +++ b/kernel_tuner/backends/hip.py @@ -40,7 +40,7 @@ def hip_check(call_result): if len(result) == 1: result = result[0] if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess: - raise RuntimeError(str(err)) + raise RuntimeError(str(err), hip.hipGetLastError()) return result diff --git a/kernel_tuner/backends/hypertuner.py b/kernel_tuner/backends/hypertuner.py new file mode 100644 index 000000000..50971f5aa --- /dev/null +++ b/kernel_tuner/backends/hypertuner.py @@ -0,0 +1,174 @@ +"""This module contains a 'device' for hyperparameter tuning using the autotuning methodology.""" + +import platform +from pathlib import Path + +from numpy import mean + +from kernel_tuner.backends.backend import Backend +from kernel_tuner.observers.observer import BenchmarkObserver + +try: + methodology_available = True + from autotuning_methodology.experiments import generate_experiment_file + from autotuning_methodology.report_experiments import get_strategy_scores +except ImportError: + methodology_available = False + + +class ScoreObserver(BenchmarkObserver): + """BenchmarkObserver subclass for registering the hyperparameter tuning score.""" + + def __init__(self, dev): + self.dev = dev + self.scores = [] + + def after_finish(self): + self.scores.append(self.dev.last_score) + + def get_results(self): + results = {'score': mean(self.scores), 'scores': self.scores.copy()} + self.scores = [] + return results + +class HypertunerFunctions(Backend): + """Class for executing hyperparameter tuning.""" + units = {} + + def __init__(self, iterations, compiler_options=None): + self.iterations = iterations + self.compiler_options = compiler_options + self.observers = [ScoreObserver(self)] + self.name = platform.processor() + self.max_threads = 1024 + self.last_score = None + + # set the defaults + self.gpus = ["A100", "A4000", "MI250X"] + folder = "../autotuning_methodology/benchmark_hub/kernels" + self.applications = [ + { + "name": "dedispersion_milo", + "folder": folder, + "input_file": "dedispersion_milo.json", + "objective_performance_keys": ["time"] + }, + { + "name": "hotspot_milo", + "folder": folder, + "input_file": "hotspot_milo.json", + "objective_performance_keys": ["GFLOP/s"] + }, + { + "name": "convolution_milo", + "folder": folder, + "input_file": "convolution_milo.json", + "objective_performance_keys": ["time"] + }, + { + "name": "gemm_milo", + "folder": folder, + "input_file": "gemm_milo.json", + "objective_performance_keys": ["time"] + } + ] + # any additional settings + self.override = { + "experimental_groups_defaults": { + "repeats": 25, + "samples": self.iterations, + "minimum_fraction_of_budget_valid": 0.1, + "minimum_number_of_valid_search_iterations": 5, + }, + "statistics_settings": { + "cutoff_percentile": 0.95, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + } + } + + # override the defaults with compiler options if provided + if self.compiler_options is not None: + if "gpus" in self.compiler_options: + self.gpus = self.compiler_options["gpus"] + if "applications" in self.compiler_options: + self.applications = self.compiler_options["applications"] + if "override" in self.compiler_options: + self.override = self.compiler_options["override"] + + # set the environment options + env = dict() + env["iterations"] = self.iterations + self.env = env + + # check for the methodology package + if methodology_available is not True: + raise ImportError("Unable to import the autotuning methodology, run `pip install autotuning_methodology`.") + + def ready_argument_list(self, arguments): + arglist = super().ready_argument_list(arguments) + if arglist is None: + arglist = [] + return arglist + + def compile(self, kernel_instance): + super().compile(kernel_instance) + path = Path(__file__).parent.parent.parent / "hyperparamtuning" + path.mkdir(exist_ok=True) + + # strategy settings + strategy: str = kernel_instance.arguments[0] + hyperparams = [{'name': k, 'value': v} for k, v in kernel_instance.params.items()] + hyperparams_string = "_".join(f"{k}={str(v)}" for k, v in kernel_instance.params.items()) + searchspace_strategies = [{ + "autotuner": "KernelTuner", + "name": f"{strategy.lower()}_{hyperparams_string}", + "display_name": strategy.replace('_', ' ').capitalize(), + "search_method": strategy.lower(), + 'search_method_hyperparameters': hyperparams + }] + + name = kernel_instance.name if len(kernel_instance.name) > 0 else kernel_instance.kernel_source.kernel_name + experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, self.applications, self.gpus, + override=self.override, generate_unique_file=True, overwrite_existing_file=True) + return str(experiments_filepath) + + def start_event(self): + return super().start_event() + + def stop_event(self): + return super().stop_event() + + def kernel_finished(self): + super().kernel_finished() + return True + + def synchronize(self): + return super().synchronize() + + def run_kernel(self, func, gpu_args=None, threads=None, grid=None, stream=None): + # generate the experiments file + experiments_filepath = Path(func) + + # run the methodology to get a fitness score for this configuration + scores = get_strategy_scores(str(experiments_filepath)) + self.last_score = scores[list(scores.keys())[0]]['score'] + + # remove the experiments file + experiments_filepath.unlink() + + def memset(self, allocation, value, size): + return super().memset(allocation, value, size) + + def memcpy_dtoh(self, dest, src): + return super().memcpy_dtoh(dest, src) + + def memcpy_htod(self, dest, src): + return super().memcpy_htod(dest, src) + + def refresh_memory(self, device_memory, host_arguments, should_sync): + """This is a no-op for the hypertuner backend, as it does not manage memory directly.""" + pass diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py index 3c3ba792b..8df2dc4ac 100644 --- a/kernel_tuner/core.py +++ b/kernel_tuner/core.py @@ -1,4 +1,4 @@ -""" Module for grouping the core functionality needed by most runners """ +"""Module for grouping the core functionality needed by most runners.""" import logging import re @@ -14,15 +14,16 @@ import kernel_tuner.util as util from kernel_tuner.accuracy import Tunable -from kernel_tuner.backends.pycuda import PyCudaFunctions +from kernel_tuner.backends.compiler import CompilerFunctions from kernel_tuner.backends.cupy import CupyFunctions from kernel_tuner.backends.hip import HipFunctions +from kernel_tuner.backends.hypertuner import HypertunerFunctions from kernel_tuner.backends.nvcuda import CudaFunctions from kernel_tuner.backends.opencl import OpenCLFunctions -from kernel_tuner.backends.compiler import CompilerFunctions +from kernel_tuner.backends.pycuda import PyCudaFunctions from kernel_tuner.observers.nvml import NVMLObserver -from kernel_tuner.observers.tegra import TegraObserver from kernel_tuner.observers.observer import ContinuousObserver, OutputObserver, PrologueObserver +from kernel_tuner.observers.tegra import TegraObserver try: import torch @@ -50,15 +51,15 @@ class KernelInstance(_KernelInstance): - """Class that represents the specific parameterized instance of a kernel""" + """Class that represents the specific parameterized instance of a kernel.""" def delete_temp_files(self): - """Delete any generated temp files""" + """Delete any generated temp files.""" for v in self.temp_files.values(): util.delete_temp_file(v) def prepare_temp_files_for_error_msg(self): - """Prepare temp file with source code, and return list of temp file names""" + """Prepare temp file with source code, and return list of temp file names.""" temp_filename = util.get_temp_filename(suffix=self.kernel_source.get_suffix()) util.write_file(temp_filename, self.kernel_string) ret = [temp_filename] @@ -92,7 +93,7 @@ def __init__(self, kernel_name, kernel_sources, lang, defines=None): self.lang = lang.upper() def get_kernel_string(self, index=0, params=None): - """retrieve the kernel source with the given index and return as a string + """Retrieve the kernel source with the given index and return as a string. See util.get_kernel_string() for details. @@ -108,11 +109,16 @@ def get_kernel_string(self, index=0, params=None): """ logging.debug("get_kernel_string called") + if hasattr(self, 'lang') and self.lang.upper() == "HYPERTUNER": + return "" + kernel_source = self.kernel_sources[index] return util.get_kernel_string(kernel_source, params) - def prepare_list_of_files(self, kernel_name, params, grid, threads, block_size_names): - """prepare the kernel string along with any additional files + def prepare_list_of_files( + self, kernel_name, params, grid, threads, block_size_names + ): + """Prepare the kernel string along with any additional files. The first file in the list is allowed to include or read in the others The files beyond the first are considered additional files that may also contain tunable parameters @@ -145,6 +151,9 @@ def prepare_list_of_files(self, kernel_name, params, grid, threads, block_size_n """ temp_files = dict() + if self.lang.upper() == "HYPERTUNER": + return tuple(["", "", temp_files]) + for i, f in enumerate(self.kernel_sources): if i > 0 and not util.looks_like_a_filename(f): raise ValueError("When passing multiple kernel sources, the secondary entries must be filenames") @@ -191,7 +200,6 @@ def get_suffix(self, index=0): This uses the user-specified suffix if available, or one based on the lang/backend otherwise. """ - # TODO: Consider delegating this to the backend suffix = self.get_user_suffix(index) if suffix is not None: @@ -204,7 +212,7 @@ def get_suffix(self, index=0): return ".c" def check_argument_lists(self, kernel_name, arguments): - """Check if the kernel arguments have the correct types + """Check if the kernel arguments have the correct types. This is done by calling util.check_argument_list on each kernel string. """ @@ -216,7 +224,7 @@ def check_argument_lists(self, kernel_name, arguments): class DeviceInterface(object): - """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner""" + """Class that offers a High-Level Device Interface to the rest of the Kernel Tuner.""" def __init__( self, @@ -229,7 +237,7 @@ def __init__( iterations=7, observers=None, ): - """Instantiate the DeviceInterface, based on language in kernel source + """Instantiate the DeviceInterface, based on language in kernel source. :param kernel_source: The kernel sources :type kernel_source: kernel_tuner.core.KernelSource @@ -259,6 +267,7 @@ def __init__( """ lang = kernel_source.lang + self.requires_warmup = True logging.debug("DeviceInterface instantiated, lang=%s", lang) @@ -305,8 +314,14 @@ def __init__( iterations=iterations, observers=observers, ) + elif lang.upper() == "HYPERTUNER": + dev = HypertunerFunctions( + iterations=iterations, + compiler_options=compiler_options + ) + self.requires_warmup = False else: - raise ValueError( + raise NotImplementedError( "Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet" ) self.dev = dev @@ -347,8 +362,7 @@ def __init__( print("Using: " + self.dev.name) def benchmark_prologue(self, func, gpu_args, threads, grid, result): - """Benchmark prologue one kernel execution per PrologueObserver""" - + """Benchmark prologue one kernel execution per PrologueObserver.""" for obs in self.prologue_observers: self.dev.synchronize() obs.before_start() @@ -358,8 +372,7 @@ def benchmark_prologue(self, func, gpu_args, threads, grid, result): result.update(obs.get_results()) def benchmark_default(self, func, gpu_args, threads, grid, result): - """Benchmark one kernel execution for 'iterations' at a time""" - + """Benchmark one kernel execution for 'iterations' at a time.""" self.dev.synchronize() for _ in range(self.iterations): for obs in self.benchmark_observers: @@ -383,7 +396,7 @@ def benchmark_default(self, func, gpu_args, threads, grid, result): def benchmark_continuous(self, func, gpu_args, threads, grid, result, duration): - """Benchmark continuously for at least 'duration' seconds""" + """Benchmark continuously for at least 'duration' seconds.""" iterations = int(np.ceil(duration / (result["time"] / 1000))) self.dev.synchronize() for obs in self.continuous_observers: @@ -474,8 +487,10 @@ def benchmark(self, func, gpu_args, instance, verbose, objective, skip_nvml_sett raise e return result - def check_kernel_output(self, func, gpu_args, instance, answer, atol, verify, verbose): - """runs the kernel once and checks the result against answer""" + def check_kernel_output( + self, func, gpu_args, instance, answer, atol, verify, verbose + ): + """Runs the kernel once and checks the result against answer.""" logging.debug("check_kernel_output") # if not using custom verify function, check if the length is the same @@ -610,7 +625,7 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options, return result def compile_kernel(self, instance, verbose): - """compile the kernel for this specific instance""" + """Compile the kernel for this specific instance.""" logging.debug("compile_kernel " + instance.name) # compile kernel_string into device func @@ -643,23 +658,23 @@ def compile_kernel(self, instance, verbose): @staticmethod def preprocess_gpu_arguments(old_arguments, params): - """Get a flat list of arguments based on the configuration given by `params`""" + """Get a flat list of arguments based on the configuration given by `params`.""" return _preprocess_gpu_arguments(old_arguments, params) def copy_shared_memory_args(self, smem_args): - """adds shared memory arguments to the most recently compiled module""" + """Adds shared memory arguments to the most recently compiled module.""" self.dev.copy_shared_memory_args(smem_args) def copy_constant_memory_args(self, cmem_args): - """adds constant memory arguments to the most recently compiled module""" + """Adds constant memory arguments to the most recently compiled module.""" self.dev.copy_constant_memory_args(cmem_args) def copy_texture_memory_args(self, texmem_args): - """adds texture memory arguments to the most recently compiled module""" + """Adds texture memory arguments to the most recently compiled module.""" self.dev.copy_texture_memory_args(texmem_args) def create_kernel_instance(self, kernel_source, kernel_options, params, verbose): - """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on""" + """Create kernel instance from kernel source, parameters, problem size, grid divisors, and so on.""" grid_div = ( kernel_options.grid_div_x, kernel_options.grid_div_y, @@ -702,15 +717,15 @@ def create_kernel_instance(self, kernel_source, kernel_options, params, verbose) return KernelInstance(name, kernel_source, kernel_string, temp_files, threads, grid, params, arguments) def get_environment(self): - """Return dictionary with information about the environment""" + """Return dictionary with information about the environment.""" return self.dev.env def memcpy_dtoh(self, dest, src): - """perform a device to host memory copy""" + """Perform a device to host memory copy.""" self.dev.memcpy_dtoh(dest, src) def ready_argument_list(self, arguments): - """ready argument list to be passed to the kernel, allocates gpu mem if necessary""" + """Ready argument list to be passed to the kernel, allocates gpu mem if necessary.""" flat_args = [] # Flatten all arguments into a single list. Required to deal with `Tunable`s @@ -737,7 +752,7 @@ def ready_argument_list(self, arguments): return gpu_args def run_kernel(self, func, gpu_args, instance): - """Run a compiled kernel instance on a device""" + """Run a compiled kernel instance on a device.""" logging.debug("run_kernel %s", instance.name) logging.debug("thread block dims (%d, %d, %d)", *instance.threads) logging.debug("grid dims (%d, %d, %d)", *instance.grid) @@ -755,7 +770,7 @@ def run_kernel(self, func, gpu_args, instance): def _preprocess_gpu_arguments(old_arguments, params): - """Get a flat list of arguments based on the configuration given by `params`""" + """Get a flat list of arguments based on the configuration given by `params`.""" new_arguments = [] for argument in old_arguments: @@ -768,8 +783,7 @@ def _preprocess_gpu_arguments(old_arguments, params): def _default_verify_function(instance, answer, result_host, atol, verbose): - """default verify function based on np.allclose""" - + """Default verify function based on np.allclose.""" # first check if the length is the same if len(instance.arguments) != len(answer): raise TypeError("The length of argument list and provided results do not match.") @@ -886,7 +900,7 @@ def _flatten(a): # these functions facilitate compiling templated kernels with PyCuda def split_argument_list(argument_list): - """split all arguments in a list into types and names""" + """Split all arguments in a list into types and names.""" regex = r"(.*[\s*]+)(.+)?" type_list = [] name_list = [] @@ -900,10 +914,10 @@ def split_argument_list(argument_list): def apply_template_typenames(type_list, templated_typenames): - """replace the typename tokens in type_list with their templated typenames""" + """Replace the typename tokens in type_list with their templated typenames.""" def replace_typename_token(matchobj): - """function for a whitespace preserving token regex replace""" + """Function for a whitespace preserving token regex replace.""" # replace only the match, leaving the whitespace around it as is return matchobj.group(1) + templated_typenames[matchobj.group(2)] + matchobj.group(3) @@ -917,7 +931,7 @@ def replace_typename_token(matchobj): def get_templated_typenames(template_parameters, template_arguments): - """based on the template parameters and arguments, create dict with templated typenames""" + """Based on the template parameters and arguments, create dict with templated typenames.""" templated_typenames = {} for i, param in enumerate(template_parameters): if "typename " in param: @@ -927,7 +941,7 @@ def get_templated_typenames(template_parameters, template_arguments): def wrap_templated_kernel(kernel_string, kernel_name): - """rewrite kernel_string to insert wrapper function for templated kernel""" + """Rewrite kernel_string to insert wrapper function for templated kernel.""" # parse kernel_name to find template_arguments and real kernel name name = kernel_name.split("<")[0] template_arguments = re.search(r".*?<(.*)>", kernel_name, re.S).group(1).split(",") diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py index e5d3dcb90..9231f0e2e 100644 --- a/kernel_tuner/file_utils.py +++ b/kernel_tuner/file_utils.py @@ -1,19 +1,43 @@ """This module contains utility functions for operations on files, mostly JSON cache files.""" import json -import os import subprocess from importlib.metadata import PackageNotFoundError, requires, version from pathlib import Path from sys import platform +import jsonschema import xmltodict from packaging.requirements import Requirement from kernel_tuner import util -schema_dir = os.path.dirname(os.path.realpath(__file__)) + "/schema" +schema_dir = Path(__file__).parent / "schema" +def input_file_schema(): + """Get the requested JSON input schema and the version number. + + :returns: the current version of the T1 schemas and the JSON string of the schema + :rtype: string, string + """ + current_version = "1.0.0" + input_file = schema_dir.joinpath(f"T1/{current_version}/input-schema.json") + with input_file.open() as fh: + json_string = json.load(fh) + return current_version, json_string + +def get_input_file(filepath: Path, validate=True) -> dict[str, any]: + """Load the T1 input file from the given path, validates it and returns contents if valid. + + :param filepath: Path to the input file to load. + :returns: the contents of the file if valid. + """ + with filepath.open() as fp: + input_file = json.load(fp) + if validate: + _, input_schema = input_file_schema() + jsonschema.validate(input_file, input_schema) + return input_file def output_file_schema(target): """Get the requested JSON schema and the version number. @@ -26,8 +50,8 @@ def output_file_schema(target): """ current_version = "1.0.0" - output_file = schema_dir + f"/T4/{current_version}/{target}-schema.json" - with open(output_file, "r") as fh: + output_file = schema_dir.joinpath(f"T4/{current_version}/{target}-schema.json") + with output_file.open() as fh: json_string = json.load(fh) return current_version, json_string @@ -63,13 +87,10 @@ def make_filenamepath(filenamepath: Path): filepath.mkdir() -def store_output_file(output_filename: str, results, tune_params, objective="time"): - """Store the obtained auto-tuning results in a JSON output file. +def get_t4_results(results, tune_params, objective="time"): + """Get the obtained auto-tuning results in a dictionary. - This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema. - - :param output_filename: Name or 'path / name' of the to be created output file - :type output_filename: string + This function produces a dictionary that adheres to the T4 auto-tuning output JSON schema. :param results: Results list as return by tune_kernel :type results: list of dicts @@ -81,9 +102,6 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim :type objective: string """ - output_filenamepath = Path(filename_ensure_json_extension(output_filename)) - make_filenamepath(output_filenamepath) - timing_keys = ["compile_time", "benchmark_time", "framework_time", "strategy_time", "verification_time"] not_measurement_keys = list(tune_params.keys()) + timing_keys + ["timestamp"] + ["times"] @@ -134,7 +152,30 @@ def store_output_file(output_filename: str, results, tune_params, objective="tim # write output_data to a JSON file version, _ = output_file_schema("results") - output_json = dict(results=output_data, schema_version=version) + output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'milliseconds'}) + return output_json + +def store_output_file(output_filename: str, results, tune_params, objective="time"): + """Store the obtained auto-tuning results in a JSON output file. + + This function produces a JSON file that adheres to the T4 auto-tuning output JSON schema. + + :param output_filename: Name or 'path / name' of the to be created output file + :type output_filename: string + + :param results: Results list as return by tune_kernel + :type results: list of dicts + + :param tune_params: Tunable parameters as passed to tune_kernel + :type tune_params: dict + + :param objective: The objective used during auto-tuning, default is 'time'. + :type objective: string + + """ + output_filenamepath = Path(filename_ensure_json_extension(output_filename)) + make_filenamepath(output_filenamepath) + output_json = get_t4_results(results, tune_params, objective) with open(output_filenamepath, "w+") as fh: json.dump(output_json, fh, cls=util.NpEncoder) @@ -175,17 +216,11 @@ def get_device_query(target): raise ValueError("get_device_query target not supported") -def store_metadata_file(metadata_filename: str): - """Store the metadata about the current hardware and software environment in a JSON output file. - - This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema. - - :param metadata_filename: Name or 'path / name' of the to be created metadata file - :type metadata_filename: string +def get_t4_metadata(): + """Get the metadata about the current hardware and software environment. + This function produces a dictionary that adheres to the T4 auto-tuning metadata JSON schema. """ - metadata_filenamepath = Path(filename_ensure_json_extension(metadata_filename)) - make_filenamepath(metadata_filenamepath) metadata = {} supported_operating_systems = ["linux", "win32", "darwin"] @@ -250,5 +285,20 @@ def store_metadata_file(metadata_filename: str): # write metadata to JSON file version, _ = output_file_schema("metadata") metadata_json = dict(metadata=metadata, schema_version=version) + return metadata_json + +def store_metadata_file(metadata_filename: str): + """Store the metadata about the current hardware and software environment in a JSON output file. + + This function produces a JSON file that adheres to the T4 auto-tuning metadata JSON schema. + + :param metadata_filename: Name or 'path / name' of the to be created metadata file + :type metadata_filename: string + + """ + metadata_filenamepath = Path(filename_ensure_json_extension(metadata_filename)) + make_filenamepath(metadata_filenamepath) + metadata_json = get_t4_metadata() with open(metadata_filenamepath, "w+") as fh: json.dump(metadata_json, fh, indent=" ") + diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py index f002882f3..97bc01567 100644 --- a/kernel_tuner/hyper.py +++ b/kernel_tuner/hyper.py @@ -1,15 +1,24 @@ -""" Module for functions related to hyperparameter optimization """ +"""Module for functions related to hyperparameter optimization.""" -import itertools -import warnings -import numpy as np + +from pathlib import Path +from random import randint +from argparse import ArgumentParser import kernel_tuner -from kernel_tuner.util import get_config_string -def tune_hyper_params(target_strategy, hyper_params, *args, **kwargs): - """ Tune hyperparameters for a given strategy and kernel +def get_random_unique_filename(prefix = '', suffix=''): + """Get a random, unique filename that does not yet exist.""" + def randpath(): + return Path(f"{prefix}{randint(1000, 9999)}{suffix}") + path = randpath() + while path.exists(): + path = randpath() + return path + +def tune_hyper_params(target_strategy: str, hyper_params: dict, restrictions: list, *args, **kwargs): + """Tune hyperparameters for a given strategy and kernel. This function is to be called just like tune_kernel, except that you specify a strategy and a dictionary with hyperparameters in front of the arguments you pass to tune_kernel. @@ -32,58 +41,136 @@ def tune_hyper_params(target_strategy, hyper_params, *args, **kwargs): :type kwargs: dict """ - if "cache" not in kwargs: - raise ValueError("Please specify a cachefile to store benchmarking data when tuning hyperparameters") + # v Have the methodology as a dependency + # - User inputs: + # - a set of bruteforced cachefiles / template experiments file + # - an optimization algorithm + # - the hyperparameter values to try + # - overarching optimization algorithm (meta-strategy) + # - At each round: + # - The meta-strategy selects a hyperparameter configuration to try + # - Kernel Tuner generates an experiments file with the hyperparameter configuration + # - Kernel Tuner executes this experiments file using the methodology + # - The methodology returns the fitness metric + # - The fitness metric is fed back into the meta-strategy + + iterations = 1 + if "iterations" in kwargs: + iterations = kwargs['iterations'] + del kwargs['iterations'] + + # pass a temporary cache file to avoid duplicate execution + if 'cache' not in kwargs: + cachefile = get_random_unique_filename('temp_', '.json') + cachefile = Path(f"hyperparamtuning_paper_bruteforce_{target_strategy}.json") + kwargs['cache'] = str(cachefile) def put_if_not_present(target_dict, key, value): target_dict[key] = value if key not in target_dict else target_dict[key] - put_if_not_present(kwargs, "verbose", False) - put_if_not_present(kwargs, "quiet", True) - put_if_not_present(kwargs, "simulation_mode", True) - kwargs['strategy'] = 'brute_force' - - #last position argument is tune_params - tune_params = args[-1] - - #find optimum - kwargs["strategy"] = "brute_force" - results, _ = kernel_tuner.tune_kernel(*args, **kwargs) - optimum = min(results, key=lambda p: p["time"])["time"] - - #could throw a warning for the kwargs that will be overwritten, strategy(_options) - kwargs["strategy"] = target_strategy - - parameter_space = itertools.product(*hyper_params.values()) - all_results = [] - - for params in parameter_space: - strategy_options = dict(zip(hyper_params.keys(), params)) - - kwargs["strategy_options"] = strategy_options - - fevals = [] - p_of_opt = [] - for _ in range(100): - #measure - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - results, _ = kernel_tuner.tune_kernel(*args, **kwargs) - - #get unique function evaluations - unique_fevals = {",".join([str(v) for k, v in record.items() if k in tune_params]) - for record in results} - - fevals.append(len(unique_fevals)) - p_of_opt.append(min(results, key=lambda p: p["time"])["time"] / optimum * 100) - - strategy_options["fevals"] = np.average(fevals) - strategy_options["fevals_std"] = np.std(fevals) - - strategy_options["p_of_opt"] = np.average(p_of_opt) - strategy_options["p_of_opt_std"] = np.std(p_of_opt) - - print(get_config_string(strategy_options)) - all_results.append(strategy_options) - - return all_results + # set default arguments if not provided + put_if_not_present(kwargs, "verbose", True) + put_if_not_present(kwargs, "quiet", False) + put_if_not_present(kwargs, "simulation_mode", False) + put_if_not_present(kwargs, "strategy", 'brute_force') + put_if_not_present(kwargs, 'verify', None) + arguments = [target_strategy] + + # IMPORTANT when running this script in parallel, always make sure the below name is unique among your runs! + # e.g. when parallalizing over the hypertuning of multiple strategies, use the strategy name + name = f"hyperparamtuning_{target_strategy.lower()}" + + # execute the hyperparameter tuning + result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, restrictions=restrictions, *args, lang='Hypertuner', + objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs) + + # remove the temporary cachefile and return only unique results in order + # cachefile.unlink() + result_unique = dict() + for r in result: + config_id = ",".join(str(r[k]) for k in hyper_params.keys()) + if config_id not in result_unique: + result_unique[config_id] = r + return list(result_unique.values()), env + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("strategy_to_tune") + args = parser.parse_args() + strategy_to_tune = args.strategy_to_tune + + # select the hyperparameter parameters for the selected optimization algorithm + restrictions = [] + if strategy_to_tune.lower() == "pso": + hyperparams = { + 'popsize': [10, 20, 30], + 'maxiter': [50, 100, 150], + # 'w': [0.25, 0.5, 0.75], # disabled due to low influence according to KW-test (H=0.0215) and mutual information + 'c1': [1.0, 2.0, 3.0], + 'c2': [0.5, 1.0, 1.5] + } + elif strategy_to_tune.lower() == "firefly_algorithm": + hyperparams = { + 'popsize': [10, 20, 30], + 'maxiter': [50, 100, 150], + 'B0': [0.5, 1.0, 1.5], + 'gamma': [0.1, 0.25, 0.5], + 'alpha': [0.1, 0.2, 0.3] + } + elif strategy_to_tune.lower() == "greedy_ils": + hyperparams = { + 'neighbor': ['Hamming', 'adjacent'], + 'restart': [True, False], + 'no_improvement': [10, 25, 50, 75], + 'random_walk': [0.1, 0.2, 0.3, 0.4, 0.5] + } + elif strategy_to_tune.lower() == "dual_annealing": + hyperparams = { + 'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'], + } + elif strategy_to_tune.lower() == "diff_evo": + hyperparams = { + 'method': ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"], + 'popsize': [10, 20, 30], + 'maxiter': [50, 100, 150], + } + elif strategy_to_tune.lower() == "basinhopping": + hyperparams = { + 'method': ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"], + 'T': [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5], + } + elif strategy_to_tune.lower() == "genetic_algorithm": + hyperparams = { + 'method': ["single_point", "two_point", "uniform", "disruptive_uniform"], + 'popsize': [10, 20, 30], + 'maxiter': [50, 100, 150], + 'mutation_chance': [5, 10, 20] + } + elif strategy_to_tune.lower() == "greedy_mls": + hyperparams = { + 'neighbor': ["Hamming", "adjacent"], + 'restart': [True, False], + 'randomize': [True, False] + } + elif strategy_to_tune.lower() == "simulated_annealing": + hyperparams = { + 'T': [0.5, 1.0, 1.5], + 'T_min': [0.0001, 0.001, 0.01], + 'alpha': [0.9925, 0.995, 0.9975], + 'maxiter': [1, 2, 3] + } + elif strategy_to_tune.lower() == "bayes_opt": + hyperparams = { + # 'covariancekernel': ["constantrbf", "rbf", "matern32", "matern52"], + 'covariancelengthscale': [1.0, 1.5, 2.0], + 'method': ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"], + 'samplingmethod': ["random", "LHS"], + 'popsize': [10, 20, 30] + } + else: + raise ValueError(f"Invalid argument {strategy_to_tune=}") + + # run the hyperparameter tuning + result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams, restrictions=restrictions) + print(result) + print(env['best_config']) diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py index d3219ba87..938c8c7c9 100644 --- a/kernel_tuner/integration.py +++ b/kernel_tuner/integration.py @@ -4,7 +4,7 @@ from jsonschema import validate -from kernel_tuner import util +from kernel_tuner.util import get_instance_string, looks_like_a_filename, read_file #specifies for a number of pre-defined objectives whether #the objective should be minimized or maximized (boolean value denotes higher is better) @@ -205,8 +205,8 @@ def top_result(item): meta["version_number"] = "1.0" meta["kernel_name"] = kernel_name if kernel_string and not callable(kernel_string) and not isinstance(kernel_string, list): - if util.looks_like_a_filename(kernel_string): - meta["kernel_string"] = util.read_file(kernel_string) + if looks_like_a_filename(kernel_string): + meta["kernel_string"] = read_file(kernel_string) else: meta["kernel_string"] = kernel_string meta["objective"] = objective @@ -337,7 +337,7 @@ def _select_best_common_config(results, objective, objective_higher_is_better): for config in results: params = config["tunable_parameters"] - config_str = util.get_instance_string(params) + config_str = get_instance_string(params) #count occurances results_table[config_str] = results_table.get(config_str,0) + 1 #add to performance diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index bd421aeab..ae8927f3b 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -23,14 +23,20 @@ See the License for the specific language governing permissions and limitations under the License. """ + import logging +from argparse import ArgumentParser +from ast import literal_eval from datetime import datetime +from pathlib import Path from time import perf_counter import numpy +from constraint import Constraint import kernel_tuner.core as core import kernel_tuner.util as util +from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results from kernel_tuner.integration import get_objective_defaults from kernel_tuner.runners.sequential import SequentialRunner from kernel_tuner.runners.simulation import SimulationRunner @@ -74,7 +80,7 @@ "pso": pso, "simulated_annealing": simulated_annealing, "firefly_algorithm": firefly_algorithm, - "bayes_opt": bayes_opt, + "bayes_opt": bayes_opt } @@ -399,7 +405,7 @@ def __deepcopy__(self, _): All strategies support the following two options: 1. "max_fevals": the maximum number of unique valid function evaluations (i.e. compiling and - benchmarking a kernel configuration the strategy is allowed to perform as part of the optimization. + benchmarking a kernel configuration) the strategy is allowed to perform as part of the optimization. Note that some strategies implement a default max_fevals of 100. 2. "time_limit": the maximum amount of time in seconds the strategy is allowed to spent on trying to @@ -598,8 +604,13 @@ def tune_kernel( # ensure there is always at least three names util.append_default_block_size_names(block_size_names) - if iterations < 1: - raise ValueError("Iterations should be at least one!") + # if the restrictions are not constraints or a callable, the restrictions are strings, so parse them to functions (increases restrictions check performance significantly) + if ( + restrictions is not None + and not callable(restrictions) + and not any(isinstance(r, Constraint) for r in restrictions) + ): + restrictions = util.compile_restrictions(restrictions, tune_params) # sort all the options into separate dicts opts = locals() @@ -658,11 +669,16 @@ def tune_kernel( # we normalize it so that it always accepts atol. tuning_options.verify = util.normalize_verify_function(tuning_options.verify) + def preprocess_cache(filepath): + if isinstance(filepath, Path): + filepath = str(filepath.resolve()) + if filepath[-5:] != ".json": + filepath += ".json" + return filepath + # process cache if cache: - if cache[-5:] != ".json": - cache += ".json" - + cache = preprocess_cache(cache) util.process_cache(cache, kernel_options, tuning_options, runner) else: tuning_options.cache = {} @@ -675,8 +691,17 @@ def tune_kernel( if verbose: print(f"Searchspace has {searchspace.size} configurations after restrictions.") - # call the strategy to execute the tuning process + # register the times and raise an exception if the budget is exceeded + if "time_limit" in tuning_options: + tuning_options["startup_time"] = perf_counter() - start_overhead_time + if tuning_options["startup_time"] > tuning_options["time_limit"]: + raise RuntimeError( + f"The startup time of the tuning process ({tuning_options['startup_time']} seconds) has exceeded the time limit ({tuning_options['time_limit']} seconds). " + "Please increase the time limit or decrease the size of the search space." + ) tuning_options["start_time"] = perf_counter() + + # call the strategy to execute the tuning process results = strategy.tune(searchspace, runner, tuning_options) env = runner.get_environment(tuning_options) @@ -684,7 +709,7 @@ def tune_kernel( if results: # checks if results is not empty best_config = util.get_best_config(results, objective, objective_higher_is_better) # add the best configuration to env - env['best_config'] = best_config + env["best_config"] = best_config if not device_options.quiet: units = getattr(runner, "units", None) print("best performing configuration:") @@ -835,3 +860,170 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names): # check for types and length of block_size_names util.check_block_size_names(block_size_names) + + +def tune_kernel_T1( + input_filepath: Path, + cache_filepath: Path = None, + objective="time", + objective_higher_is_better=False, + simulation_mode=False, + output_T4=True, + iterations=7, + device=None, + strategy: str=None, + strategy_options: dict={}, +) -> tuple: + """ + Call the tune function with a T1 input file. + + The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used. + """ + inputs = get_input_file(input_filepath) + kernelspec: dict = inputs["KernelSpecification"] + kernel_name: str = kernelspec["KernelName"] + kernel_filepath = Path(kernelspec["KernelFile"]) + kernel_source = ( + kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent / kernel_filepath + ) + kernel_source = ( + kernel_source if kernel_source.exists() else Path(input_filepath).parent.parent / kernel_filepath + ) + assert kernel_source.exists(), f"KernelFile '{kernel_source}' does not exist at {kernel_source.resolve()}" + language: str = kernelspec["Language"] + problem_size = kernelspec["ProblemSize"] + if device is None: + device = kernelspec["Device"]["Name"] + if strategy is None: + strategy = inputs["Search"]["Name"] + if "Attributes" in inputs["Search"]: + for attribute in inputs["Search"]["Attributes"]: + strategy_options[attribute["Name"]] = attribute["Value"] + if "Budget" in inputs: + budget = inputs["Budget"][0] + if budget["Type"] == "ConfigurationCount": + strategy_options["max_fevals"] = budget["BudgetValue"] + elif budget["Type"] == "TuningDuration": + strategy_options["time_limit"] = budget["BudgetValue"] # both are in seconds + else: + raise NotImplementedError(f"Budget type in {budget} is not supported") + + # set the cache path + if cache_filepath is None and "SimulationInput" in kernelspec: + cache_filepath = Path(kernelspec["SimulationInput"]) + + # get the grid divisions + grid_divs = {} + for grid_div in ["GridDivX", "GridDivY", "GridDivZ"]: + grid_divs[grid_div] = None + if grid_div in kernelspec and len(kernelspec[grid_div]) > 0: + grid_divs[grid_div] = kernelspec[grid_div] + + # convert tuneable parameters + tune_params = dict() + for param in inputs["ConfigurationSpace"]["TuningParameters"]: + tune_param = None + if param["Type"] in ["int", "float"]: + vals = param["Values"] + if "list(" in vals or "range(" in vals or (vals[0] == "[" and vals[-1] == "]"): + tune_param = eval(vals) + else: + tune_param = literal_eval(vals) + if param["Type"] == "string": + tune_param = eval(param["Values"]) + if tune_param is not None: + tune_params[param["Name"]] = tune_param + else: + raise NotImplementedError(f"Conversion for this type of parameter has not yet been implemented: {param}") + + # convert restrictions + restrictions = list() + for res in inputs["ConfigurationSpace"]["Conditions"]: + restriction = None + if isinstance(res["Expression"], str): + restriction = res["Expression"] + if restriction is not None: + restrictions.append(restriction) + else: + raise NotImplementedError(f"Conversion for this type of restriction has not yet been implemented: {res}") + + # convert arguments (must be after resolving tune_params) + arguments = list() + cmem_arguments = {} + for arg in kernelspec["Arguments"]: + argument = None + if arg["MemoryType"] == "Vector": + if arg["Type"] != "float": + raise NotImplementedError( + f"Conversion for vector type '{arg['Type']}' has not yet been implemented: {arg}" + ) + size = arg["Size"] + if isinstance(size, str): + args = tune_params.copy() + args["ProblemSize"] = problem_size + size = int(eval(size, args)) + if not isinstance(size, int): + raise TypeError(f"Size should be an integer, but is {size} (type ({type(size)}, from {arg['Size']}))") + if arg["FillType"] == "Constant": + argument = numpy.full(size, arg["FillValue"]).astype(numpy.float32) + elif arg["FillType"] == "Random": + argument = numpy.random.randn(size).astype(numpy.float32) + else: + raise NotImplementedError(f"Conversion for fill type '{arg['FillType']}' has not yet been implemented") + elif arg["MemoryType"] == "Scalar": + if arg["Type"] == "float": + argument = numpy.float32(arg["FillValue"]) + else: + raise NotImplementedError() + if argument is not None: + arguments.append(argument) + if "MemType" in arg and arg["MemType"] == "Constant": + cmem_arguments[arg["Name"]] = argument + else: + raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}") + + # tune with the converted inputs + results, env = tune_kernel( + kernel_name, + kernel_source, + problem_size, + arguments, + tune_params, + device=device, + grid_div_x=grid_divs["GridDivX"], + grid_div_y=grid_divs["GridDivY"], + grid_div_z=grid_divs["GridDivZ"], + cmem_args=cmem_arguments, + restrictions=restrictions, + lang=language, + cache=cache_filepath, + simulation_mode=simulation_mode, + quiet=True, + verbose=False, + iterations=iterations, + strategy=strategy, + strategy_options=strategy_options, + objective=objective, + objective_higher_is_better=objective_higher_is_better, + ) + if output_T4: + return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective) + return results, env + + +def entry_point(args=None): # pragma: no cover + """Command-line interface entry point.""" + cli = ArgumentParser() + cli.add_argument("input_file", type=str, help="The path to the input json file to execute (T1 standard)") + cli.add_argument( + "cache_file", type=str, help="The path to the cachefile to use (optional)", required=False, default=None + ) + args = cli.parse_args(args) + input_filepath_arg: str = args.input_file + if input_filepath_arg is None or input_filepath_arg == "": + raise ValueError("Invalid '--input_file' option. Run 'kernel_tuner -h' to read more.") + input_filepath = Path(input_filepath_arg) + cachefile_filepath_arg = args.cache_file + if cachefile_filepath_arg is not None: + cachefile_filepath_arg = Path(cachefile_filepath_arg) + tune_kernel_T1(input_filepath, cache_filepath=cachefile_filepath_arg) diff --git a/kernel_tuner/runners/sequential.py b/kernel_tuner/runners/sequential.py index aeebd5116..5e53093be 100644 --- a/kernel_tuner/runners/sequential.py +++ b/kernel_tuner/runners/sequential.py @@ -34,7 +34,7 @@ def __init__(self, kernel_source, kernel_options, device_options, iterations, ob self.units = self.dev.units self.quiet = device_options.quiet self.kernel_source = kernel_source - self.warmed_up = False + self.warmed_up = False if self.dev.requires_warmup else True self.simulation_mode = False self.start_time = perf_counter() self.last_strategy_start_time = self.start_time diff --git a/kernel_tuner/schema/T1/1.0.0/input-schema.json b/kernel_tuner/schema/T1/1.0.0/input-schema.json new file mode 100644 index 000000000..598a4b3d1 --- /dev/null +++ b/kernel_tuner/schema/T1/1.0.0/input-schema.json @@ -0,0 +1,413 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://github.com/odgaard/TuningSchema/blob/main/TuningSchema.json", + "title": "Tuning format", + "description": "A description of a tuning problem which can be loaded by an autotuning framework", + "type": "object", + "required": [ + "ConfigurationSpace", + "KernelSpecification" + ], + "properties": { + "ConfigurationSpace": { + "type": "object", + "required": [ + "TuningParameters" + ], + "properties": { + "TuningParameters": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Name", + "Type", + "Values" + ], + "properties": { + "Name": { + "type": "string" + }, + "Type": { + "enum": [ + "int", + "uint", + "float", + "bool", + "string" + ] + }, + "Values": { + "type": "string" + } + } + } + }, + "Conditions": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Parameters", + "Expression" + ], + "properties": { + "Parameters": { + "type": "array", + "items": { + "type": "string" + } + }, + "Expression": { + "type": "string" + } + } + } + } + } + }, + "Search": { + "type": "object", + "required": [ + "Name" + ], + "properties": { + "Name": { + "type": "string" + }, + "Attributes": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Name", + "Value" + ], + "properties": { + "Name": { + "type": "string" + }, + "Value": { + "type": [ + "number", + "string", + "boolean", + "object", + "array" + ] + } + } + } + } + } + }, + "Budget": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Type", + "BudgetValue" + ], + "properties": { + "Type": { + "enum": [ + "TuningDuration", + "ConfigurationCount", + "ConfigurationFraction" + ] + }, + "BudgetValue": { + "type": "number" + } + } + } + }, + "General": { + "type": "object", + "properties": { + "FormatVersion": { + "type": "integer" + }, + "LoggingLevel": { + "enum": [ + "Off", + "Error", + "Warning", + "Info", + "Debug" + ] + }, + "TimeUnit": { + "enum": [ + "Nanoseconds", + "Microseconds", + "Milliseconds", + "Seconds" + ] + }, + "OutputFile": { + "type": "string", + "examples": [ + "ReductionOutput", + "Results" + ] + }, + "OutputFormat": { + "enum": [ + "JSON", + "XML" + ] + } + } + }, + "KernelSpecification": { + "type": "object", + "required": [ + "Language", + "KernelName", + "KernelFile", + "GlobalSize", + "LocalSize" + ], + "properties": { + "Device": { + "type": "object", + "properties": { + "PlatformId": { + "type": "integer" + }, + "DeviceId": { + "type": "integer" + }, + "Name": { + "type": "string" + } + } + }, + "Language": { + "enum": [ + "OpenCL", + "CUDA", + "Vulkan", + "Hypertuner" + ] + }, + "CompilerOptions": { + "type": "array", + "items": { + "type": "string" + } + }, + "Profiling": { + "type": "boolean" + }, + "KernelName": { + "type": "string" + }, + "KernelFile": { + "type": "string" + }, + "GlobalSizeType": { + "enum": [ + "OpenCL", + "CUDA", + "Vulkan" + ] + }, + "SharedMemory": { + "type": "integer" + }, + "SimulationInput": { + "type": "string" + }, + "GlobalSize": { + "type": "object", + "required": [ + "X" + ], + "properties": { + "X": { + "type": "string" + }, + "Y": { + "type": "string" + }, + "Z": { + "type": "string" + } + } + }, + "LocalSize": { + "type": "object", + "required": [ + "X" + ], + "properties": { + "X": { + "type": "string" + }, + "Y": { + "type": "string" + }, + "Z": { + "type": "string" + } + } + }, + "Arguments": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Type", + "MemoryType" + ], + "properties": { + "Name": { + "type": "string" + }, + "Type": { + "enum": [ + "bool", + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", + "half", + "half2", + "half4", + "half8", + "half16", + "float", + "float2", + "float4", + "float8", + "float16", + "double", + "double2", + "double4", + "double8", + "double16", + "custom" + ] + }, + "Size": { + "type": [ + "integer", + "string" + ], + "examples": [ + 720, + 26000, + "ProblemSize[0]+max(filter_width)-1" + ] + }, + "TypeSize": { + "type": "integer", + "examples": [ + 4, + 16 + ] + }, + "FillType": { + "enum": [ + "Constant", + "Random", + "Generator", + "Script", + "BinaryRaw", + "BinaryHDF" + ] + }, + "FillValue": { + "type": "number", + "examples": [ + 40, + 1.0 + ] + }, + "DataSource": { + "type": "string" + }, + "RandomSeed": { + "type": "integer" + }, + "AccessType": { + "enum": [ + "ReadOnly", + "WriteOnly", + "ReadWrite" + ] + }, + "MemoryType": { + "enum": [ + "Scalar", + "Vector", + "Local", + "Symbol" + ] + } + } + } + }, + "ReferenceArguments": { + "type": "array", + "items": { + "type": "object", + "required": [ + "Name", + "TargetName", + "FillType" + ], + "properties": { + "Name": { + "type": "string" + }, + "TargetName": { + "type": "string" + }, + "FillType": { + "enum": [ + "Constant", + "Random", + "Generator", + "Script", + "BinaryRaw", + "BinaryHDF" + ] + }, + "FillValue": { + "type": "number", + "examples": [ + 40, + 1.0 + ] + }, + "DataSource": { + "type": "string" + }, + "RandomSeed": { + "type": "integer" + }, + "ValidationMethod": { + "enum": [ + "AbsoluteDifference", + "SideBySideComparison", + "SideBySideRelativeComparison" + ] + }, + "ValidationThreshold": { + "type": "number" + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/kernel_tuner/schema/T4/1.0.0/results-schema.json b/kernel_tuner/schema/T4/1.0.0/results-schema.json index 298f2662c..511042016 100644 --- a/kernel_tuner/schema/T4/1.0.0/results-schema.json +++ b/kernel_tuner/schema/T4/1.0.0/results-schema.json @@ -59,7 +59,11 @@ "type": "string" }, "value": { - "type": "number" + "type": [ + "number", + "string", + "array" + ] }, "unit": { "type": "string" diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index 36e772639..e650f9628 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -1,8 +1,10 @@ import ast +import numbers import re from pathlib import Path from random import choice, shuffle -from typing import List +from typing import List, Union +from warnings import warn import numpy as np from constraint import ( @@ -12,13 +14,26 @@ MaxProdConstraint, MinConflictsSolver, OptimizedBacktrackingSolver, + # ParallelSolver, Problem, RecursiveBacktrackingSolver, Solver, ) +try: + import torch + from torch import Tensor + + torch_available = True +except ImportError: + torch_available = False + from kernel_tuner.util import check_restrictions as check_instance_restrictions -from kernel_tuner.util import compile_restrictions, default_block_size_names +from kernel_tuner.util import ( + compile_restrictions, + default_block_size_names, + get_interval, +) supported_neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming"] @@ -34,6 +49,7 @@ def __init__( block_size_names=default_block_size_names, build_neighbors_index=False, neighbor_method=None, + from_cache: dict = None, framework="PythonConstraint", solver_method="PC_OptimizedBacktrackingSolver", path_to_ATF_cache: Path = None, @@ -45,18 +61,39 @@ def __init__( adjacent: picks closest parameter value in both directions for each parameter Hamming: any parameter config with 1 different parameter value is a neighbor Optionally sort the searchspace by the order in which the parameter values were specified. By default, sort goes from first to last parameter, to reverse this use sort_last_param_first. + Optionally an imported cache can be used instead with `from_cache`, in which case the `tune_params`, `restrictions` and `max_threads` arguments can be set to None, and construction is skipped. """ + # check the arguments + if from_cache is not None: + assert ( + tune_params is None and restrictions is None and max_threads is None + ), "When `from_cache` is used, the positional arguments must be set to None." + tune_params = from_cache["tune_params"] + if from_cache is None: + assert tune_params is not None and max_threads is not None, "Must specify positional arguments." + # set the object attributes using the arguments framework_l = framework.lower() restrictions = restrictions if restrictions is not None else [] self.tune_params = tune_params - self.restrictions = restrictions + self._tensorspace = None + self.tensor_dtype = torch.float32 if torch_available else None + self.tensor_device = torch.device("cpu") if torch_available else None + self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device) + self._tensorspace_bounds = None + self._tensorspace_bounds_indices = [] + self._tensorspace_categorical_dimensions = [] + self._tensorspace_param_config_structure = [] + self._map_tensor_to_param = {} + self._map_param_to_tensor = {} + self.restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads) - self._modified_restrictions = restrictions + self._modified_restrictions = restrictions.copy() if hasattr(restrictions, "copy") else restrictions self.param_names = list(self.tune_params.keys()) self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values()) self.params_values_indices = None self.build_neighbors_index = build_neighbors_index + self.solver_method = solver_method self.__neighbor_cache = dict() self.neighbor_method = neighbor_method if (neighbor_method is not None or build_neighbors_index) and neighbor_method not in supported_neighbor_methods: @@ -66,43 +103,66 @@ def __init__( restrictions = [restrictions] if not isinstance(restrictions, list) else restrictions if ( len(restrictions) > 0 - and any(isinstance(restriction, str) for restriction in restrictions) - and not (framework_l == "pysmt" or framework_l == "pyatf" or framework_l == "bruteforce") + and ( + any(isinstance(restriction, str) for restriction in restrictions) + or any( + isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple) + ) + ) + and not ( + framework_l == "pysmt" or framework_l == "bruteforce" or solver_method.lower() == "pc_parallelsolver" + ) ): self.restrictions = compile_restrictions( - restrictions, tune_params, monolithic=False, try_to_constraint=framework_l == "pythonconstraint" + restrictions, + tune_params, + monolithic=False, + format=framework_l if framework_l == "pyatf" else None, + try_to_constraint=framework_l == "pythonconstraint", ) - # get the framework given the framework argument - if framework_l == "pythonconstraint": - searchspace_builder = self.__build_searchspace - elif framework_l == "pysmt": - searchspace_builder = self.__build_searchspace_pysmt - elif framework_l == "pyatf": - searchspace_builder = self.__build_searchspace_pyATF - elif framework_l == "atf_cache": - searchspace_builder = self.__build_searchspace_ATF_cache - self.path_to_ATF_cache = path_to_ATF_cache - elif framework_l == "bruteforce": - searchspace_builder = self.__build_searchspace_bruteforce - else: - raise ValueError(f"Invalid framework parameter {framework}") - - # get the solver given the solver method argument - solver = "" - if solver_method.lower() == "pc_backtrackingsolver": - solver = BacktrackingSolver() - elif solver_method.lower() == "pc_optimizedbacktrackingsolver": - solver = OptimizedBacktrackingSolver(forwardcheck=False) - elif solver_method.lower() == "pc_recursivebacktrackingsolver": - solver = RecursiveBacktrackingSolver() - elif solver_method.lower() == "pc_minconflictssolver": - solver = MinConflictsSolver() + # if an imported cache, skip building and set the values directly + if from_cache is not None: + configs = dict(from_cache["cache"]).values() + self.list = list(tuple([v for p, v in c.items() if p in self.tune_params]) for c in configs) + self.size = len(self.list) + self.__dict = dict(zip(self.list, range(self.size))) else: - raise ValueError(f"Solver method {solver_method} not recognized.") + # get the framework given the framework argument + if framework_l == "pythonconstraint": + searchspace_builder = self.__build_searchspace + elif framework_l == "pysmt": + searchspace_builder = self.__build_searchspace_pysmt + elif framework_l == "pyatf": + searchspace_builder = self.__build_searchspace_pyATF + elif framework_l == "atf_cache": + searchspace_builder = self.__build_searchspace_ATF_cache + self.path_to_ATF_cache = path_to_ATF_cache + elif framework_l == "bruteforce": + searchspace_builder = self.__build_searchspace_bruteforce + else: + raise ValueError(f"Invalid framework parameter {framework}") + + # get the solver given the solver method argument + solver = "" + if solver_method.lower() == "pc_backtrackingsolver": + solver = BacktrackingSolver() + elif solver_method.lower() == "pc_optimizedbacktrackingsolver": + solver = OptimizedBacktrackingSolver(forwardcheck=False) + elif solver_method.lower() == "pc_parallelsolver": + raise NotImplementedError("ParallelSolver is not yet implemented") + # solver = ParallelSolver() + elif solver_method.lower() == "pc_recursivebacktrackingsolver": + solver = RecursiveBacktrackingSolver() + elif solver_method.lower() == "pc_minconflictssolver": + solver = MinConflictsSolver() + else: + raise ValueError(f"Solver method {solver_method} not recognized.") - # build the search space - self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver) + # build the search space + self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver) + + # finalize construction self.__numpy = None self.num_params = len(self.tune_params) self.indices = np.arange(self.size) @@ -145,7 +205,7 @@ def __init__( # num_solutions: int = csp.n_solutions() # number of solutions # solutions = [csp.values(sol=i) for i in range(num_solutions)] # list of solutions - def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: int, solver = None): + def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: int, solver=None): # bruteforce solving of the searchspace from itertools import product @@ -167,9 +227,16 @@ def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: in restrictions = [restrictions] block_size_restriction_spaced = f"{' * '.join(used_block_size_names)} <= {max_threads}" block_size_restriction_unspaced = f"{'*'.join(used_block_size_names)} <= {max_threads}" - if block_size_restriction_spaced not in restrictions and block_size_restriction_unspaced not in restrictions: + if ( + block_size_restriction_spaced not in restrictions + and block_size_restriction_unspaced not in restrictions + ): restrictions.append(block_size_restriction_spaced) - if isinstance(self._modified_restrictions, list) and block_size_restriction_spaced not in self._modified_restrictions: + if ( + isinstance(self._modified_restrictions, list) + and block_size_restriction_spaced not in self._modified_restrictions + ): + print(f"added default block size restriction '{block_size_restriction_spaced}'") self._modified_restrictions.append(block_size_restriction_spaced) if isinstance(self.restrictions, list): self.restrictions.append(block_size_restriction_spaced) @@ -252,25 +319,76 @@ def all_smt(formula, keys) -> list: def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, solver: Solver): """Builds the searchspace using pyATF.""" - from pyatf import TP, Set, Tuner + from pyatf import TP, Interval, Set, Tuner from pyatf.cost_functions.generic import CostFunction from pyatf.search_techniques import Exhaustive - costfunc = CostFunction("echo 'hello'") + # Define a bogus cost function + costfunc = CostFunction(":") # bash no-op + + # add the Kernel Tuner default blocksize threads restrictions + assert isinstance(self.restrictions, list) + valid_block_size_names = list( + block_size_name for block_size_name in block_size_names if block_size_name in self.param_names + ) + if len(valid_block_size_names) > 0: + # adding the default blocksize restriction requires recompilation because pyATF requires combined restrictions for the same parameter + max_block_size_product = f"{' * '.join(valid_block_size_names)} <= {max_threads}" + restrictions = self._modified_restrictions.copy() + [max_block_size_product] + self.restrictions = compile_restrictions( + restrictions, self.tune_params, format="pyatf", try_to_constraint=False + ) + # build a dictionary of the restrictions, combined based on last parameter + res_dict = dict() + registered_params = list() + registered_restrictions = list() + for param in self.tune_params.keys(): + registered_params.append(param) + for index, (res, params, source) in enumerate(self.restrictions): + if index in registered_restrictions: + continue + if all(p in registered_params for p in params): + if param in res_dict: + raise KeyError( + f"`{param}` is already in res_dict with `{res_dict[param][1]}`, can't add `{source}`" + ) + res_dict[param] = (res, source) + print(source, res, param, params) + registered_restrictions.append(index) + + # define the Tunable Parameters def get_params(): - params = List() - for key, values in self.tune_params.items(): - TP(key, Set(values)) + params = list() + for index, (key, values) in enumerate(self.tune_params.items()): + vi = get_interval(values) + vals = ( + Interval(vi[0], vi[1], vi[2]) if vi is not None and vi[2] != 0 else Set(*np.array(values).flatten()) + ) + constraint = res_dict.get(key, None) + constraint_source = None + if constraint is not None: + constraint, constraint_source = constraint + # in case of a leftover monolithic restriction, append at the last parameter + if index == len(self.tune_params) - 1 and len(res_dict) == 0 and len(self.restrictions) == 1: + res, params, source = self.restrictions[0] + assert callable(res) + constraint = res + params.append(TP(key, vals, constraint, constraint_source)) return params - tuning_result = ( - Tuner() - .tuning_parameters(*get_params()) - .search_technique(Exhaustive()) - .tune(costfunc) + # tune + _, _, tuning_data = ( + Tuner().verbosity(0).tuning_parameters(*get_params()).search_technique(Exhaustive()).tune(costfunc) ) - return tuning_result + + # transform the result into a list of parameter configurations for validation + tune_params = self.tune_params + parameter_tuple_list = list() + for entry in tuning_data.history._entries: + parameter_tuple_list.append(tuple(entry.configuration[p] for p in tune_params.keys())) + pl = self.__parameter_space_list_to_lookup_and_return_type(parameter_tuple_list) + return pl def __build_searchspace_ATF_cache(self, block_size_names: list, max_threads: int, solver: Solver): """Imports the valid configurations from an ATF CSV file, returns the searchspace, a dict of the searchspace for fast lookups and the size.""" @@ -323,10 +441,13 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver: if len(valid_block_size_names) > 0: parameter_space.addConstraint(MaxProdConstraint(max_threads), valid_block_size_names) max_block_size_product = f"{' * '.join(valid_block_size_names)} <= {max_threads}" - if isinstance(self._modified_restrictions, list) and max_block_size_product not in self._modified_restrictions: + if ( + isinstance(self._modified_restrictions, list) + and max_block_size_product not in self._modified_restrictions + ): self._modified_restrictions.append(max_block_size_product) if isinstance(self.restrictions, list): - self.restrictions.append((MaxProdConstraint(max_threads), valid_block_size_names)) + self.restrictions.append((MaxProdConstraint(max_threads), valid_block_size_names, None)) # construct the parameter space with the constraints applied return parameter_space.getSolutionsAsListDict(order=self.param_names) @@ -339,7 +460,7 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem: # convert to a Constraint type if necessary if isinstance(restriction, tuple): - restriction, required_params = restriction + restriction, required_params, _ = restriction if callable(restriction) and not isinstance(restriction, Constraint): restriction = FunctionConstraint(restriction) @@ -348,12 +469,11 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem: parameter_space.addConstraint(restriction, required_params) elif isinstance(restriction, Constraint): all_params_required = all(param_name in required_params for param_name in self.param_names) - parameter_space.addConstraint( - restriction, - None if all_params_required else required_params - ) + parameter_space.addConstraint(restriction, None if all_params_required else required_params) + elif isinstance(restriction, str) and self.solver_method.lower() == "pc_parallelsolver": + parameter_space.addConstraint(restriction) else: - raise ValueError(f"Unrecognized restriction {restriction}") + raise ValueError(f"Unrecognized restriction type {type(restriction)} ({restriction})") # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!) elif callable(self.restrictions): @@ -537,11 +657,116 @@ def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]: # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices) return list(map(self.list.__getitem__, indices)) - def get_param_config_index(self, param_config: tuple): + def get_param_config_index(self, param_config: Union[tuple, any]): """Lookup the index for a parameter configuration, returns None if not found.""" + if torch_available and isinstance(param_config, Tensor): + param_config = self.tensor_to_param_config(param_config) # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space return self.__dict.get(param_config, None) + def initialize_tensorspace(self, dtype=None, device=None): + """Encode the searchspace in a Tensor. Save the mapping. Call this function directly to control the precision or device used.""" + assert self._tensorspace is None, "Tensorspace is already initialized" + skipped_count = 0 + bounds = [] + if dtype is not None: + self.tensor_dtype = dtype + if device is not None: + self.tensor_device = device + self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device) + + # generate the mappings to and from tensor values + for index, param_values in enumerate(self.params_values): + # filter out parameters that do not matter, more efficient and avoids bounds problem + if len(param_values) < 2 or all(p == param_values[0] for p in param_values): + # keep track of skipped parameters, add them back in conversion functions + self._tensorspace_param_config_structure.append(param_values[0]) + skipped_count += 1 + continue + else: + self._tensorspace_param_config_structure.append(None) + + # convert numericals to float, or encode categorical + if all(isinstance(v, numbers.Real) for v in param_values): + tensor_values = torch.tensor(param_values, dtype=self.tensor_dtype) + else: + self._tensorspace_categorical_dimensions.append(index - skipped_count) + # tensor_values = np.arange(len(param_values)) + tensor_values = torch.arange(len(param_values), dtype=self.tensor_dtype) + + # write the mappings to the object + self._map_param_to_tensor[index] = dict(zip(param_values, tensor_values.tolist())) + self._map_tensor_to_param[index] = dict(zip(tensor_values.tolist(), param_values)) + bounds.append((tensor_values.min(), tensor_values.max())) + if tensor_values.min() < tensor_values.max(): + self._tensorspace_bounds_indices.append(index - skipped_count) + + # do some checks + assert len(self.params_values) == len(self._tensorspace_param_config_structure) + assert len(self._map_param_to_tensor) == len(self._map_tensor_to_param) == len(bounds) + assert len(self._tensorspace_bounds_indices) <= len(bounds) + + # apply the mappings on the full searchspace + # numpy_repr = self.get_list_numpy() + # numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr) + # self._tensorspace = torch.from_numpy(numpy_repr.astype(self.tensor_dtype)).to(self.tensor_device) + self._tensorspace = torch.stack(tuple(map(self.param_config_to_tensor, self.list))) + + # set the bounds in the correct format (one array for the min, one for the max) + bounds = torch.tensor(bounds, **self.tensor_kwargs) + self._tensorspace_bounds = torch.cat([bounds[:, 0], bounds[:, 1]]).reshape((2, bounds.shape[0])) + + def get_tensorspace(self): + """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first.""" + if self._tensorspace is None: + self.initialize_tensorspace() + return self._tensorspace + + def get_tensorspace_categorical_dimensions(self): + """Get the a list of the categorical dimensions in the tensorspace.""" + return self._tensorspace_categorical_dimensions + + def param_config_to_tensor(self, param_config: tuple): + """Convert from a parameter configuration to a Tensor.""" + if len(self._map_param_to_tensor) == 0: + self.initialize_tensorspace() + array = [] + for i, param in enumerate(param_config): + if self._tensorspace_param_config_structure[i] is not None: + continue # skip over parameters not in the tensorspace + mapping = self._map_param_to_tensor[i] + conversions = [None, str, float, int, bool] + for c in conversions: + try: + c_param = param if c is None else c(param) + array.append(mapping[c_param]) + break + except (KeyError, ValueError) as e: + if c == conversions[-1]: + raise KeyError(f"No variant of {param} could be found in {mapping}") from e + return torch.tensor(array, **self.tensor_kwargs) + + def tensor_to_param_config(self, tensor): + """Convert from a Tensor to a parameter configuration.""" + assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})" + if len(self._map_tensor_to_param) == 0: + self.initialize_tensorspace() + config = self._tensorspace_param_config_structure.copy() + skip_counter = 0 + for i, param in enumerate(config): + if param is not None: + skip_counter += 1 + else: + value = tensor[i - skip_counter].item() + config[i] = self._map_tensor_to_param[i][value] + return tuple(config) + + def get_tensorspace_bounds(self): + """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters.""" + if self._tensorspace is None: + self.initialize_tensorspace() + return self._tensorspace_bounds, self._tensorspace_bounds_indices + def __prepare_neighbors_index(self): """Prepare by calculating the indices for the individual parameters.""" self.params_values_indices = np.array(list(self.get_param_indices(param_config) for param_config in self.list)) @@ -639,6 +864,11 @@ def get_random_sample_indices(self, num_samples: int) -> np.ndarray: def get_random_sample(self, num_samples: int) -> List[tuple]: """Get the parameter configurations for a random, non-conflicting sample (caution: not unique in consecutive calls).""" + if self.size < num_samples: + warn( + f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})" + ) + num_samples = self.size return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples)) def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None) -> List[int]: @@ -752,3 +982,39 @@ def order_param_configs( f"The number of ordered parameter configurations ({len(ordered_param_configs)}) differs from the original number of parameter configurations ({len(param_configs)})" ) return ordered_param_configs + + def to_ax_searchspace(self): + """Convert this searchspace to an Ax SearchSpace.""" + from ax import ChoiceParameter, FixedParameter, ParameterType, SearchSpace + + # create searchspace + ax_searchspace = SearchSpace([]) + + # add the parameters + for param_name, param_values in self.tune_params.items(): + if len(param_values) == 0: + continue + + # convert the types + assert all( + isinstance(param_values[0], type(v)) for v in param_values + ), f"Parameter values of mixed types are not supported: {param_values}" + param_type_mapping = { + str: ParameterType.STRING, + int: ParameterType.INT, + float: ParameterType.FLOAT, + bool: ParameterType.BOOL, + } + param_type = param_type_mapping[type(param_values[0])] + + # add the parameter + if len(param_values) == 1: + ax_searchspace.add_parameter(FixedParameter(param_name, param_type, param_values[0])) + else: + ax_searchspace.add_parameter(ChoiceParameter(param_name, param_type, param_values)) + + # add the constraints + raise NotImplementedError( + "Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity." + ) + # return ax_searchspace diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py index 20e800f6e..eed906676 100644 --- a/kernel_tuner/strategies/basinhopping.py +++ b/kernel_tuner/strategies/basinhopping.py @@ -1,7 +1,7 @@ """The strategy that uses the basinhopping global optimization method.""" import scipy.optimize -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options @@ -31,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): try: opt_result = scipy.optimize.basinhopping(cost_func, x0, T=T, stepsize=eps, minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py index 024a3f8c0..66e360009 100644 --- a/kernel_tuner/strategies/bayes_opt.py +++ b/kernel_tuner/strategies/bayes_opt.py @@ -1,4 +1,5 @@ """Bayesian Optimization implementation from the thesis by Willemsen.""" + import itertools import time import warnings @@ -13,6 +14,7 @@ # BO imports from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies.common import CostFunc +from kernel_tuner.util import StopCriterionReached try: from sklearn.gaussian_process import GaussianProcessRegressor @@ -24,7 +26,7 @@ from kernel_tuner import util -supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast"] +supported_methods = ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"] def generate_normalized_param_dicts(tune_params: dict, eps: float) -> Tuple[dict, dict]: @@ -137,16 +139,17 @@ def tune(searchspace: Searchspace, runner, tuning_options): bo = BayesianOptimization( parameter_space, removed_tune_params, tuning_options, normalize_dict, denormalize_dict, cost_func ) - except util.StopCriterionReached as e: - print( + except StopCriterionReached: + warnings.warn( "Stop criterion reached during initialization, was popsize (default 20) greater than max_fevals or the alotted time?" ) - raise e + return cost_func.results + # raise e try: if max_fevals - bo.fevals <= 0: raise ValueError("No function evaluations left for optimization after sampling") bo.optimize(max_fevals) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) @@ -162,7 +165,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): covariancelengthscale=("The covariance length scale", 1.5), method=( "The Bayesian Optimization method to use, choose any from " + ", ".join(supported_methods), - "multi-advanced", + "multi-ultrafast", ), samplingmethod=( "Method used for initial sampling the parameter space, either random or Latin Hypercube Sampling (LHS)", @@ -199,7 +202,7 @@ def get_hyperparam(name: str, default, supported_values=list()): # get hyperparameters cov_kernel_name = get_hyperparam("covariancekernel", "matern32", self.supported_cov_kernels) cov_kernel_lengthscale = get_hyperparam("covariancelengthscale", 1.5) - acquisition_function = get_hyperparam("method", "multi-advanced", self.supported_methods) + acquisition_function = get_hyperparam("method", "multi-ultrafast", self.supported_methods) acq = acquisition_function acq_params = get_hyperparam("methodparams", {}) multi_af_names = get_hyperparam("multi_af_names", ["ei", "poi", "lcb"]) @@ -342,6 +345,8 @@ def set_acquisition_function(self, acquisition_function: str): self.optimize = self.__optimize_multi_advanced elif acquisition_function == "multi-fast": self.optimize = self.__optimize_multi_fast + elif acquisition_function == "multi-ultrafast": + self.optimize = self.__optimize_multi_ultrafast else: raise ValueError( "Acquisition function must be one of {}, is {}".format(self.supported_methods, acquisition_function) @@ -843,6 +848,44 @@ def __optimize_multi_fast(self, max_fevals): self.update_after_evaluation(observation, candidate_index, candidate_params) self.fit_observations_to_model() + def __optimize_multi_ultrafast(self, max_fevals, predict_eval_ratio=5): + """Optimize with a portfolio of multiple acquisition functions. Predictions are only taken once, or fewer if predictions take too long. + + The `predict_eval_ratio` denotes the ratio between the duration of the predictions and the duration of evaluations, as updating the prediction every evaluation is not efficient when evaluation is quick. + Predictions are only updated if the previous evaluation took more than `predict_eval_ratio` * the last prediction duration, or the last prediction is more than `predict_eval_ratio` evaluations ago. + """ + last_prediction_counter = 0 + last_prediction_time = 0 + last_eval_time = 0 + while self.fevals < max_fevals: + aqfs = self.multi_afs + # if we take the prediction only once, we want to go from most exploiting to most exploring, because the more exploiting an AF is, the more it relies on non-stale information from the model + fit_observations = last_prediction_time * predict_eval_ratio <= last_eval_time or last_prediction_counter >= predict_eval_ratio + if fit_observations: + last_prediction_counter = 0 + pred_start = time.perf_counter() + if last_eval_time > 0.0: + self.fit_observations_to_model() + predictions, _, std = self.predict_list(self.unvisited_cache) + last_prediction_time = time.perf_counter() - pred_start + else: + last_prediction_counter += 1 + eval_start = time.perf_counter() + hyperparam = self.contextual_variance(std) + if self.__visited_num >= self.searchspace_size: + raise ValueError(self.error_message_searchspace_fully_observed) + for af in aqfs: + if self.__visited_num >= self.searchspace_size or self.fevals >= max_fevals: + break + list_of_acquisition_values = af(predictions, hyperparam) + best_af = self.argopt(list_of_acquisition_values) + del predictions[best_af] # to avoid going out of bounds + candidate_params = self.unvisited_cache[best_af] + candidate_index = self.find_param_config_index(candidate_params) + observation = self.evaluate_objective_function(candidate_params) + self.update_after_evaluation(observation, candidate_index, candidate_params) + last_eval_time = time.perf_counter() - eval_start + def af_random(self, predictions=None, hyperparam=None) -> list: """Acquisition function returning a randomly shuffled list for comparison.""" list_random = range(len(self.unvisited_cache)) diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py index a0e3f8ebe..66896942f 100644 --- a/kernel_tuner/strategies/brute_force.py +++ b/kernel_tuner/strategies/brute_force.py @@ -1,4 +1,4 @@ -""" The default strategy that iterates through the whole parameter space """ +"""The default strategy that iterates through the whole parameter space.""" from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py index d01eae937..a24f6b599 100644 --- a/kernel_tuner/strategies/common.py +++ b/kernel_tuner/strategies/common.py @@ -1,8 +1,12 @@ +"""Module for functionality that is commonly used throughout the strategies.""" + import logging +import numbers import sys from time import perf_counter import numpy as np +from scipy.spatial import distance from kernel_tuner import util from kernel_tuner.searchspace import Searchspace @@ -30,7 +34,9 @@ def get_strategy_docstring(name, strategy_options): """Generate docstring for a 'tune' method of a strategy.""" - return _docstring_template.replace("$NAME$", name).replace("$STRAT_OPT$", make_strategy_options_doc(strategy_options)) + return _docstring_template.replace("$NAME$", name).replace( + "$STRAT_OPT$", make_strategy_options_doc(strategy_options) + ) def make_strategy_options_doc(strategy_options): @@ -53,21 +59,72 @@ def get_options(strategy_options, options): class CostFunc: - def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True): + """Class encapsulating the CostFunc method.""" + + def __init__( + self, + searchspace: Searchspace, + tuning_options, + runner, + *, + scaling=False, + snap=True, + encode_non_numeric=False, + return_invalid=False, + return_raw=None, + ): + """An abstract method to handle evaluation of configurations. + + Args: + searchspace: the Searchspace to evaluate on. + tuning_options: various tuning options. + runner: the runner to use. + scaling: whether to internally scale parameter values. Defaults to False. + snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True. + encode_non_numeric: whether to externally encode non-numeric parameter values. Defaults to False. + return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False. + return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None. + """ self.runner = runner - self.tuning_options = tuning_options self.snap = snap self.scaling = scaling + self.encode_non_numeric = encode_non_numeric + self.return_invalid = return_invalid + self.return_raw = return_raw + if return_raw is True: + self.return_raw = f"{tuning_options['objective']}s" self.searchspace = searchspace + self.tuning_options = tuning_options + if isinstance(self.tuning_options, dict): + self.tuning_options["max_fevals"] = min( + tuning_options["max_fevals"] if "max_fevals" in tuning_options else np.inf, searchspace.size + ) self.results = [] + # if enabled, encode non-numeric parameter values as a numeric value + if self.encode_non_numeric: + self._map_param_to_encoded = {} + self._map_encoded_to_param = {} + self.encoded_params_values = [] + for i, param_values in enumerate(self.searchspace.params_values): + encoded_values = param_values + if not all(isinstance(v, numbers.Real) for v in param_values): + encoded_values = np.arange( + len(param_values) + ) # NOTE when changing this, adjust the rounding in encoded_to_params + self._map_param_to_encoded[i] = dict(zip(param_values, encoded_values)) + self._map_encoded_to_param[i] = dict(zip(encoded_values, param_values)) + self.encoded_params_values.append(encoded_values) + def __call__(self, x, check_restrictions=True): """Cost function used by almost all strategies.""" self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time) + if self.encode_non_numeric: + x = self.encoded_to_params(x) # error value to return for numeric optimizers that need a numerical value - logging.debug('_cost_func called') - logging.debug('x: ' + str(x)) + logging.debug("_cost_func called") + logging.debug("x: %s", str(x)) # check if max_fevals is reached or time limit is exceeded util.check_stop_criterion(self.tuning_options) @@ -80,7 +137,7 @@ def __call__(self, x, check_restrictions=True): params = snap_to_nearest_config(x, self.searchspace.tune_params) else: params = x - logging.debug('params ' + str(params)) + logging.debug("params %s", str(params)) legal = True result = {} @@ -88,8 +145,17 @@ def __call__(self, x, check_restrictions=True): # else check if this is a legal (non-restricted) configuration if check_restrictions and self.searchspace.restrictions: + legal = self.searchspace.is_param_config_valid(tuple(params)) params_dict = dict(zip(self.searchspace.tune_params.keys(), params)) - legal = util.check_restrictions(self.searchspace.restrictions, params_dict, self.tuning_options.verbose) + + if "constraint_aware" in self.tuning_options.strategy_options and self.tuning_options.strategy_options["constraint_aware"]: + # attempt to repair + new_params = unscale_and_snap_to_nearest_valid(x, params, self.searchspace, self.tuning_options.eps) + if new_params: + params = new_params + legal = True + x_int = ",".join([str(i) for i in params]) + if not legal: result = params_dict result[self.tuning_options.objective] = util.InvalidConfig() @@ -109,8 +175,18 @@ def __call__(self, x, check_restrictions=True): self.runner.last_strategy_start_time = perf_counter() # get numerical return value, taking optimization direction into account - return_value = result[self.tuning_options.objective] or sys.float_info.max - return_value = return_value if not self.tuning_options.objective_higher_is_better else -return_value + if self.return_invalid: + return_value = result[self.tuning_options.objective] + else: + return_value = result[self.tuning_options.objective] or sys.float_info.max + return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value + + # include raw data in return if requested + if self.return_raw is not None: + try: + return return_value, result[self.return_raw] + except KeyError: + return return_value, [np.nan] return return_value @@ -146,28 +222,59 @@ def get_bounds_x0_eps(self): eps = min(eps, np.amin(np.gradient(vals))) self.tuning_options["eps"] = eps - logging.debug('get_bounds_x0_eps called') - logging.debug('bounds ' + str(bounds)) - logging.debug('x0 ' + str(x0)) - logging.debug('eps ' + str(eps)) + logging.debug("get_bounds_x0_eps called") + logging.debug("bounds %s", str(bounds)) + logging.debug("x0 %s", str(x0)) + logging.debug("eps %s", str(eps)) return bounds, x0, eps def get_bounds(self): """Create a bounds array from the tunable parameters.""" bounds = [] - for values in self.searchspace.tune_params.values(): - sorted_values = np.sort(values) - bounds.append((sorted_values[0], sorted_values[-1])) + for values in self.encoded_params_values if self.encode_non_numeric else self.searchspace.params_values: + bounds.append((min(values), max(values))) return bounds + def encoded_to_params(self, config): + """Convert from an encoded configuration to the real parameters.""" + if not self.encode_non_numeric: + raise ValueError("'encode_non_numeric' must be set to true to use this function.") + params = [] + for i, v in enumerate(config): + # params.append(self._map_encoded_to_param[i][v] if i in self._map_encoded_to_param else v) + if i in self._map_encoded_to_param: + encoding = self._map_encoded_to_param[i] + if v in encoding: + param = encoding[v] + elif isinstance(v, float): + # try to resolve a rounding error due to floating point arithmetic / continous solver + param = encoding[round(v)] + else: + raise ValueError(f"Encoded value {v} not found in {self._map_encoded_to_param[i]}") + else: + param = v + params.append(param) + assert len(params) == len(config) + return params + + def params_to_encoded(self, config): + """Convert from a parameter configuration to the encoded configuration.""" + if not self.encode_non_numeric: + raise ValueError("'encode_non_numeric' must be set to true to use this function.") + encoded = [] + for i, v in enumerate(config): + encoded.append(self._map_param_to_encoded[i][v] if i in self._map_param_to_encoded else v) + assert len(encoded) == len(config) + return encoded + def setup_method_arguments(method, bounds): """Prepare method specific arguments.""" kwargs = {} # pass bounds to methods that support it if method in ["L-BFGS-B", "TNC", "SLSQP"]: - kwargs['bounds'] = bounds + kwargs["bounds"] = bounds return kwargs @@ -180,21 +287,21 @@ def setup_method_options(method, tuning_options): maxiter = tuning_options.strategy_options.maxiter else: maxiter = 100 - kwargs['maxiter'] = maxiter + kwargs["maxiter"] = maxiter if method in ["Nelder-Mead", "Powell"]: - kwargs['maxfev'] = maxiter + kwargs["maxfev"] = maxiter elif method == "L-BFGS-B": - kwargs['maxfun'] = maxiter + kwargs["maxfun"] = maxiter # pass eps to methods that support it if method in ["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"]: - kwargs['eps'] = tuning_options.eps + kwargs["eps"] = tuning_options.eps elif method == "COBYLA": - kwargs['rhobeg'] = tuning_options.eps + kwargs["rhobeg"] = tuning_options.eps # not all methods support 'disp' option - if method not in ['TNC']: - kwargs['disp'] = tuning_options.verbose + if method not in ["TNC"]: + kwargs["disp"] = tuning_options.verbose return kwargs @@ -241,5 +348,30 @@ def scale_from_params(params, tune_params, eps): """Helper func to do the inverse of the 'unscale' function.""" x = np.zeros(len(params)) for i, v in enumerate(tune_params.values()): - x[i] = 0.5 * eps + v.index(params[i])*eps + x[i] = 0.5 * eps + v.index(params[i]) * eps return x + + + +def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps): + """Helper func to snap to the nearest valid configuration""" + + # params is nearest unscaled point, but is not valid + neighbors = get_neighbors(params, searchspace) + + if neighbors: + # sort on distance to x + neighbors.sort(key=lambda y: distance.euclidean(x,scale_from_params(y, searchspace.tune_params, eps))) + + # return closest valid neighbor + return neighbors[0] + + return [] + + +def get_neighbors(params, searchspace): + for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: + neighbors = searchspace.get_neighbors_no_cache(tuple(params), neighbor_method=neighbor_method) + if len(neighbors) > 0: + return neighbors + return [] diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py index 5ad2b9474..d77772992 100644 --- a/kernel_tuner/strategies/diff_evo.py +++ b/kernel_tuner/strategies/diff_evo.py @@ -1,14 +1,15 @@ """The differential evolution strategy that optimizes the search through the parameter space.""" from scipy.optimize import differential_evolution -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"] +supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", + "randtobest1bin", "best2bin", "rand2bin", "rand1bin"] -_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"), +_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "randtobest1bin"), popsize=("Population size", 20), maxiter=("Number of generations", 100)) @@ -18,19 +19,20 @@ def tune(searchspace: Searchspace, runner, tuning_options): method, popsize, maxiter = common.get_options(tuning_options.strategy_options, _options) - # build a bounds array as needed for the optimizer - cost_func = CostFunc(searchspace, tuning_options, runner) + # build a bounds array as needed for the optimizer, and encode because it can't handle non-numeric values + cost_func = CostFunc(searchspace, tuning_options, runner, encode_non_numeric=True) bounds = cost_func.get_bounds() # ensure particles start from legal points population = list(list(p) for p in searchspace.get_random_sample(popsize)) + population_enc = [cost_func.params_to_encoded(c) for c in population] # call the differential evolution optimizer opt_result = None try: - opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population, + opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population_enc, polish=False, strategy=method, disp=tuning_options.verbose) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py index 0f44bd849..598151ea5 100644 --- a/kernel_tuner/strategies/dual_annealing.py +++ b/kernel_tuner/strategies/dual_annealing.py @@ -1,18 +1,19 @@ """The strategy that uses the dual annealing optimization method.""" import scipy.optimize -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options supported_methods = ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'] -_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "Powell")) +_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "COBYLA")) def tune(searchspace: Searchspace, runner, tuning_options): - method = common.get_options(tuning_options.strategy_options, _options)[0] + _options["max_fevals"] = ("", searchspace.size) + method, max_fevals = common.get_options(tuning_options.strategy_options, _options) #scale variables in x to make 'eps' relevant for multiple variables cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True) @@ -29,8 +30,8 @@ def tune(searchspace: Searchspace, runner, tuning_options): opt_result = None try: - opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0) - except util.StopCriterionReached as e: + opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0, maxfun=max_fevals) + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py index dc43aae6f..7ebe33a15 100644 --- a/kernel_tuner/strategies/firefly_algorithm.py +++ b/kernel_tuner/strategies/firefly_algorithm.py @@ -3,7 +3,7 @@ import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, scale_from_params @@ -13,7 +13,8 @@ maxiter=("Maximum number of iterations", 100), B0=("Maximum attractiveness", 1.0), gamma=("Light absorption coefficient", 1.0), - alpha=("Randomization parameter", 0.2)) + alpha=("Randomization parameter", 0.2), + constraint_aware=("constraint-aware optimization (True/False)", True)) def tune(searchspace: Searchspace, runner, tuning_options): @@ -23,7 +24,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): # using this instead of get_bounds because scaling is used bounds, _, eps = cost_func.get_bounds_x0_eps() - num_particles, maxiter, B0, gamma, alpha = common.get_options(tuning_options.strategy_options, _options) + num_particles, maxiter, B0, gamma, alpha, constraint_aware = common.get_options(tuning_options.strategy_options, _options) best_score_global = sys.float_info.max best_position_global = [] @@ -34,15 +35,16 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm.append(Firefly(bounds)) # ensure particles start from legal points - population = list(list(p) for p in searchspace.get_random_sample(num_particles)) - for i, particle in enumerate(swarm): - particle.position = scale_from_params(population[i], searchspace.tune_params, eps) + if constraint_aware: + population = list(list(p) for p in searchspace.get_random_sample(num_particles)) + for i, particle in enumerate(swarm): + particle.position = scale_from_params(population[i], searchspace.tune_params, eps) # compute initial intensities for j in range(num_particles): try: swarm[j].compute_intensity(cost_func) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -65,7 +67,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm[i].move_towards(swarm[j], beta, alpha) try: swarm[i].compute_intensity(cost_func) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py index c29c150b5..27f07e8db 100644 --- a/kernel_tuner/strategies/genetic_algorithm.py +++ b/kernel_tuner/strategies/genetic_algorithm.py @@ -1,40 +1,52 @@ """A simple genetic algorithm for parameter search.""" + import random import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached, get_best_config from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc _options = dict( popsize=("population size", 20), - maxiter=("maximum number of generations", 100), - method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"), - mutation_chance=("chance to mutate is 1 in mutation_chance", 10), + maxiter=("maximum number of generations", 150), + method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "single_point"), + mutation_chance=("chance to mutate is 1 in mutation_chance", 5), + constraint_aware=("constraint-aware optimization (True/False)", True), ) def tune(searchspace: Searchspace, runner, tuning_options): options = tuning_options.strategy_options - pop_size, generations, method, mutation_chance = common.get_options(options, _options) - crossover = supported_methods[method] + pop_size, generations, method, mutation_chance, constraint_aware = common.get_options(options, _options) + + # if necessary adjust the popsize to a sensible value based on search space size + if pop_size < 2 or pop_size > np.floor(searchspace.size / 2): + pop_size = min(max(round((searchspace.size / generations) * 3), 2), pop_size) + + GA = GeneticAlgorithm(pop_size, searchspace, method, mutation_chance, constraint_aware) best_score = 1e20 cost_func = CostFunc(searchspace, tuning_options, runner) + num_evaluated = 0 - population = list(list(p) for p in searchspace.get_random_sample(pop_size)) + population = GA.generate_population() for generation in range(generations): + if any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]): + raise ValueError(f"Generation {generation}/{generations}, population validity: {[searchspace.is_param_config_valid(tuple(dna)) for dna in population]}") # determine fitness of population members weighted_population = [] for dna in population: try: - time = cost_func(dna, check_restrictions=False) - except util.StopCriterionReached as e: + # if we are not constraint-aware we should check restrictions upon evaluation + time = cost_func(dna, check_restrictions=not constraint_aware) + num_evaluated += 1 + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -46,21 +58,24 @@ def tune(searchspace: Searchspace, runner, tuning_options): # 'best_score' is used only for printing if tuning_options.verbose and cost_func.results: - best_score = util.get_best_config(cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better)[tuning_options.objective] + best_score = get_best_config( + cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better + )[tuning_options.objective] if tuning_options.verbose: print("Generation %d, best_score %f" % (generation, best_score)) + # build new population for next generation population = [] # crossover and mutate - while len(population) < pop_size: - dna1, dna2 = weighted_choice(weighted_population, 2) + while len(population) < pop_size and searchspace.size > num_evaluated + len(population): + dna1, dna2 = GA.weighted_choice(weighted_population, 2) - children = crossover(dna1, dna2) + children = GA.crossover(dna1, dna2) for child in children: - child = mutate(child, mutation_chance, searchspace) + child = GA.mutate(child) if child not in population and searchspace.is_param_config_valid(tuple(child)): population.append(child) @@ -75,57 +90,117 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Genetic Algorithm", _options) +class GeneticAlgorithm: + + def __init__(self, pop_size, searchspace, method="uniform", mutation_chance=10, constraint_aware=True): + self.pop_size = pop_size + self.searchspace = searchspace + self.tune_params = searchspace.tune_params.copy() + self.crossover_method = supported_methods[method] + self.mutation_chance = mutation_chance + self.constraint_aware = constraint_aware -def weighted_choice(population, n): - """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected.""" - - def random_index_betavariate(pop_size): - # has a higher probability of returning index of item at the head of the list - alpha = 1 - beta = 2.5 - return int(random.betavariate(alpha, beta) * pop_size) - - def random_index_weighted(pop_size): - """Use weights to increase probability of selection.""" - weights = [w for _, w in population] - # invert because lower is better - inverted_weights = [1.0 / w for w in weights] - prefix_sum = np.cumsum(inverted_weights) - total_weight = sum(inverted_weights) - randf = random.random() * total_weight - # return first index of prefix_sum larger than random number - return next(i for i, v in enumerate(prefix_sum) if v > randf) - - random_index = random_index_betavariate - - indices = [random_index(len(population)) for _ in range(n)] - chosen = [] - for ind in indices: - while ind in chosen: - ind = random_index(len(population)) - chosen.append(ind) - - return [population[ind][0] for ind in chosen] - - -def mutate(dna, mutation_chance, searchspace: Searchspace, cache=True): - """Mutate DNA with 1/mutation_chance chance.""" - # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list - if int(random.random() * mutation_chance) == 0: - if cache: - neighbors = searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming") + def generate_population(self): + """ Constraint-aware population creation method """ + if self.constraint_aware: + pop = list(list(p) for p in self.searchspace.get_random_sample(self.pop_size)) else: - neighbors = searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming") - if len(neighbors) > 0: - return list(random.choice(neighbors)) - return dna + pop = [] + dna_size = len(self.tune_params) + for _ in range(self.pop_size): + dna = [] + for key in self.tune_params: + dna.append(random.choice(self.tune_params[key])) + pop.append(dna) + return pop + + def crossover(self, dna1, dna2): + """ Apply selected crossover method, repair dna if constraint-aware """ + dna1, dna2 = self.crossover_method(dna1, dna2) + if self.constraint_aware: + return self.repair(dna1), self.repair(dna2) + return dna1, dna2 + + def weighted_choice(self, population, n): + """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected.""" + + def random_index_betavariate(pop_size): + # has a higher probability of returning index of item at the head of the list + alpha = 1 + beta = 2.5 + return int(random.betavariate(alpha, beta) * pop_size) + + def random_index_weighted(pop_size): + """Use weights to increase probability of selection.""" + weights = [w for _, w in population] + # invert because lower is better + inverted_weights = [1.0 / w for w in weights] + prefix_sum = np.cumsum(inverted_weights) + total_weight = sum(inverted_weights) + randf = random.random() * total_weight + # return first index of prefix_sum larger than random number + return next(i for i, v in enumerate(prefix_sum) if v > randf) + + random_index = random_index_betavariate + + indices = [random_index(len(population)) for _ in range(n)] + chosen = [] + for ind in indices: + while ind in chosen: + ind = random_index(len(population)) + chosen.append(ind) + + return [population[ind][0] for ind in chosen] + + + def mutate(self, dna, cache=False): + """Mutate DNA with 1/mutation_chance chance.""" + # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list + if int(random.random() * self.mutation_chance) == 0: + if self.constraint_aware: + if cache: + neighbors = self.searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming") + else: + neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming") + if len(neighbors) > 0: + return list(random.choice(neighbors)) + else: + # select a tunable parameter at random + mutate_index = random.randint(0, len(self.tune_params)-1) + mutate_key = list(self.tune_params.keys())[mutate_index] + # get all possible values for this parameter and remove current value + new_val_options = self.tune_params[mutate_key].copy() + new_val_options.remove(dna[mutate_index]) + # pick new value at random + if len(new_val_options) > 0: + new_val = random.choice(new_val_options) + dna[mutate_index] = new_val + return dna + + + def repair(self, dna): + """ It is possible that crossover methods yield a configuration that is not valid. """ + if not self.searchspace.is_param_config_valid(tuple(dna)): + # dna is not valid, try to repair it + # search for valid configurations neighboring this config + # start from strictly-adjacent to increasingly allowing more neighbors + for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: + neighbors = self.searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method=neighbor_method) + + # if we have found valid neighboring configurations, select one at random + if len(neighbors) > 0: + new_dna = list(random.choice(neighbors)) + # print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {new_dna=}") + return new_dna + + return dna def single_point_crossover(dna1, dna2): """Crossover dna1 and dna2 at a random index.""" # check if you can do the crossovers using the neighbor index: check which valid parameter configuration is closest to the crossover, probably best to use "adjacent" as it is least strict? pos = int(random.random() * (len(dna1))) - return (dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:]) + return dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:] def two_point_crossover(dna1, dna2): @@ -137,7 +212,7 @@ def two_point_crossover(dna1, dna2): pos1, pos2 = sorted(random.sample(list(range(start, end)), 2)) child1 = dna1[:pos1] + dna2[pos1:pos2] + dna1[pos2:] child2 = dna2[:pos1] + dna1[pos1:pos2] + dna2[pos2:] - return (child1, child2) + return child1, child2 def uniform_crossover(dna1, dna2): @@ -168,7 +243,7 @@ def disruptive_uniform_crossover(dna1, dna2): child1[ind] = dna2[ind] child2[ind] = dna1[ind] swaps += 1 - return (child1, child2) + return child1, child2 supported_methods = { @@ -177,3 +252,4 @@ def disruptive_uniform_crossover(dna1, dna2): "uniform": uniform_crossover, "disruptive_uniform": disruptive_uniform_crossover, } + diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py index a4c521746..51a3c784e 100644 --- a/kernel_tuner/strategies/greedy_ils.py +++ b/kernel_tuner/strategies/greedy_ils.py @@ -1,9 +1,10 @@ """A simple greedy iterative local search algorithm for parameter search.""" -from kernel_tuner import util +from random import choice as random_choice + +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -from kernel_tuner.strategies.genetic_algorithm import mutate from kernel_tuner.strategies.hillclimbers import base_hillclimb _options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), @@ -40,7 +41,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): try: candidate = base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=True) new_score = cost_func(candidate, check_restrictions=False) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -58,9 +59,14 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Greedy Iterative Local Search (ILS)", _options) +def mutate(indiv, searchspace: Searchspace): + neighbors = searchspace.get_neighbors_no_cache(tuple(indiv), neighbor_method="Hamming") + return list(random_choice(neighbors)) + + def random_walk(indiv, permutation_size, no_improve, last_improve, searchspace: Searchspace): if last_improve >= no_improve: return searchspace.get_random_sample(1)[0] for _ in range(permutation_size): - indiv = mutate(indiv, 0, searchspace, cache=False) + indiv = mutate(indiv, searchspace) return indiv diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py index 1b34da501..cdca53e12 100644 --- a/kernel_tuner/strategies/greedy_mls.py +++ b/kernel_tuner/strategies/greedy_mls.py @@ -1,5 +1,5 @@ """A greedy multi-start local search algorithm for parameter search.""" -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.hillclimbers import base_hillclimb @@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): try: base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=randomize, order=order) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results diff --git a/kernel_tuner/strategies/hillclimbers.py b/kernel_tuner/strategies/hillclimbers.py index b64e7d733..ccd4eebf0 100644 --- a/kernel_tuner/strategies/hillclimbers.py +++ b/kernel_tuner/strategies/hillclimbers.py @@ -1,13 +1,12 @@ import random -from kernel_tuner import util from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies.common import CostFunc def base_hillclimb(base_sol: tuple, neighbor_method: str, max_fevals: int, searchspace: Searchspace, tuning_options, cost_func: CostFunc, restart=True, randomize=True, order=None): - """ Hillclimbing search until max_fevals is reached or no improvement is found + """Hillclimbing search until max_fevals is reached or no improvement is found. Base hillclimber that evaluates neighbouring solutions in a random or fixed order and possibly immediately moves to the neighbour if it is an improvement. @@ -51,6 +50,9 @@ def base_hillclimb(base_sol: tuple, neighbor_method: str, max_fevals: int, searc """ if randomize and order: raise ValueError("Using a preset order and randomize at the same time is not supported.") + + # limit max_fevals to max size of the parameter space + max_fevals = min(searchspace.size, max_fevals) tune_params = searchspace.tune_params diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py index 80c1c6f82..71929a040 100644 --- a/kernel_tuner/strategies/minimize.py +++ b/kernel_tuner/strategies/minimize.py @@ -2,7 +2,7 @@ import scipy.optimize -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies.common import ( CostFunc, @@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): opt_result = None try: opt_result = scipy.optimize.minimize(cost_func, x0, method=method, options=options, **kwargs) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py index 5b0df1429..a02aed1c5 100644 --- a/kernel_tuner/strategies/pso.py +++ b/kernel_tuner/strategies/pso.py @@ -1,30 +1,33 @@ """The strategy that uses particle swarm optimization.""" + import random import sys import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, scale_from_params -_options = dict(popsize=("Population size", 20), - maxiter=("Maximum number of iterations", 100), - w=("Inertia weight constant", 0.5), - c1=("Cognitive constant", 2.0), - c2=("Social constant", 1.0)) +_options = dict( + popsize=("Population size", 30), + maxiter=("Maximum number of iterations", 100), + w=("Inertia weight constant", 0.5), + c1=("Cognitive constant", 3.0), + c2=("Social constant", 0.5), + constraint_aware=("constraint-aware optimization (True/False)", True)) def tune(searchspace: Searchspace, runner, tuning_options): - #scale variables in x because PSO works with velocities to visit different configurations + # scale variables in x because PSO works with velocities to visit different configurations cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True) - #using this instead of get_bounds because scaling is used + # using this instead of get_bounds because scaling is used bounds, _, eps = cost_func.get_bounds_x0_eps() - - num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options) + num_particles, maxiter, w, c1, c2, constraint_aware = common.get_options(tuning_options.strategy_options, _options) + num_particles = min(round(searchspace.size / 2), num_particles) best_score_global = sys.float_info.max best_position_global = [] @@ -35,9 +38,10 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm.append(Particle(bounds)) # ensure particles start from legal points - population = list(list(p) for p in searchspace.get_random_sample(num_particles)) - for i, particle in enumerate(swarm): - particle.position = scale_from_params(population[i], searchspace.tune_params, eps) + if constraint_aware: + population = list(list(p) for p in searchspace.get_random_sample(num_particles)) + for i, particle in enumerate(swarm): + particle.position = scale_from_params(population[i], searchspace.tune_params, eps) # start optimization for i in range(maxiter): @@ -48,7 +52,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): for j in range(num_particles): try: swarm[j].evaluate(cost_func) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -64,7 +68,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm[j].update_position(bounds) if tuning_options.verbose: - print('Final result:') + print("Final result:") print(best_position_global) print(best_score_global) @@ -73,6 +77,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Particle Swarm Optimization (PSO)", _options) + class Particle: def __init__(self, bounds): self.ndim = len(bounds) diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py index 022eda534..57eaac6cc 100644 --- a/kernel_tuner/strategies/random_sample.py +++ b/kernel_tuner/strategies/random_sample.py @@ -1,7 +1,7 @@ """Iterate over a random sample of the parameter space.""" import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc @@ -17,7 +17,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): # override if max_fevals is specified if "max_fevals" in tuning_options: - num_samples = tuning_options.max_fevals + num_samples = min(tuning_options.max_fevals, searchspace.size) samples = searchspace.get_random_sample(num_samples) @@ -26,7 +26,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): for sample in samples: try: cost_func(sample, check_restrictions=False) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py index dce929b7b..d01ba7e4f 100644 --- a/kernel_tuner/strategies/simulated_annealing.py +++ b/kernel_tuner/strategies/simulated_annealing.py @@ -4,22 +4,24 @@ import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -_options = dict(T=("Starting temperature", 1.0), - T_min=("End temperature", 0.001), - alpha=("Alpha parameter", 0.995), - maxiter=("Number of iterations within each annealing step", 1)) + +_options = dict(T=("Starting temperature", 0.5), + T_min=("End temperature", 0.001), + alpha=("Alpha parameter", 0.9975), + maxiter=("Number of iterations within each annealing step", 2), + constraint_aware=("constraint-aware optimization (True/False)", True)) def tune(searchspace: Searchspace, runner, tuning_options): # SA works with real parameter values and does not need scaling cost_func = CostFunc(searchspace, tuning_options, runner) # optimization parameters - T, T_min, alpha, niter = common.get_options(tuning_options.strategy_options, _options) + T, T_min, alpha, niter, constraint_aware = common.get_options(tuning_options.strategy_options, _options) T_start = T # compute how many iterations would be needed to complete the annealing schedule @@ -27,10 +29,13 @@ def tune(searchspace: Searchspace, runner, tuning_options): # if user supplied max_fevals that is lower then max_iter we will # scale the annealing schedule to fit max_fevals - max_feval = tuning_options.strategy_options.get("max_fevals", max_iter) + max_fevals = tuning_options.strategy_options.get("max_fevals", max_iter) + + # limit max_fevals to max size of the parameter space + max_fevals = min(searchspace.size, max_fevals) # get random starting point and evaluate cost - pos = list(searchspace.get_random_sample(1)[0]) + pos = generate_starting_point(searchspace, constraint_aware) old_cost = cost_func(pos, check_restrictions=False) # main optimization loop @@ -46,10 +51,10 @@ def tune(searchspace: Searchspace, runner, tuning_options): for _ in range(niter): - new_pos = neighbor(pos, searchspace) + new_pos = neighbor(pos, searchspace, constraint_aware) try: - new_cost = cost_func(new_pos, check_restrictions=False) - except util.StopCriterionReached as e: + new_cost = cost_func(new_pos, check_restrictions=not constraint_aware) + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -64,7 +69,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): old_cost = new_cost c = len(tuning_options.unique_results) - T = T_start * alpha**(max_iter/max_feval*c) + T = T_start * alpha**(max_iter/max_fevals*c) # check if solver gets stuck and if so restart from random position if c == c_old: @@ -73,7 +78,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): stuck = 0 c_old = c if stuck > 100: - pos = list(searchspace.get_random_sample(1)[0]) + pos = generate_starting_point(searchspace, constraint_aware) stuck = 0 # safeguard @@ -103,11 +108,49 @@ def acceptance_prob(old_cost, new_cost, T, tuning_options): return np.exp(((old_cost-new_cost)/old_cost)/T) -def neighbor(pos, searchspace: Searchspace): +def neighbor(pos, searchspace: Searchspace, constraint_aware=True): """Return a random neighbor of pos.""" - # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions - neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent') - if len(neighbors) > 0: - return list(random.choice(neighbors)) - # if there are no neighbors, return a random configuration - return list(searchspace.get_random_sample(1)[0]) + + if constraint_aware: + # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions + neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent') + if len(neighbors) > 0: + return list(random.choice(neighbors)) + # if there are no neighbors, return a random configuration + return list(searchspace.get_random_sample(1)[0]) + + else: + tune_params = searchspace.tune_params + size = len(pos) + pos_out = [] + # random mutation + # expected value is set that values all dimensions attempt to get mutated + for i in range(size): + key = list(tune_params.keys())[i] + values = tune_params[key] + + if random.random() < 0.2: #replace with random value + new_value = random_val(i, tune_params) + else: #adjacent value + ind = values.index(pos[i]) + if random.random() > 0.5: + ind += 1 + else: + ind -= 1 + ind = min(max(ind, 0), len(values)-1) + new_value = values[ind] + + pos_out.append(new_value) + return pos_out + +def random_val(index, tune_params): + """return a random value for a parameter""" + key = list(tune_params.keys())[index] + return random.choice(tune_params[key]) + +def generate_starting_point(searchspace: Searchspace, constraint_aware=True): + if constraint_aware: + return list(searchspace.get_random_sample(1)[0]) + else: + tune_params = searchspace.tune_params + return [random_val(i, tune_params) for i in range(len(tune_params))] diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py index 072cce433..3b08b5fa0 100644 --- a/kernel_tuner/util.py +++ b/kernel_tuner/util.py @@ -10,6 +10,7 @@ import time import warnings from inspect import signature +from pathlib import Path from types import FunctionType from typing import Optional, Union @@ -192,8 +193,8 @@ def check_argument_list(kernel_name, kernel_string, args): def check_stop_criterion(to): """Checks if max_fevals is reached or time limit is exceeded.""" if "max_fevals" in to and len(to.unique_results) >= to.max_fevals: - raise StopCriterionReached("max_fevals reached") - if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3)) > to.time_limit): + raise StopCriterionReached(f"max_fevals reached ({len(to.unique_results)} >= {to.max_fevals})") + if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time) > to.time_limit): raise StopCriterionReached("time limit exceeded") @@ -268,12 +269,15 @@ def check_restriction(restrict, params: dict) -> bool: # if it's a tuple, use only the parameters in the second argument to call the restriction elif ( isinstance(restrict, tuple) - and len(restrict) == 2 + and (len(restrict) == 2 or len(restrict) == 3) and callable(restrict[0]) and isinstance(restrict[1], (list, tuple)) ): # unpack the tuple - restrict, selected_params = restrict + if len(restrict) == 2: + restrict, selected_params = restrict + else: + restrict, selected_params, source = restrict # look up the selected parameters and their value selected_params = dict((key, params[key]) for key in selected_params) # call the restriction @@ -457,6 +461,28 @@ def get_instance_string(params): return "_".join([str(i) for i in params.values()]) +def get_interval(a: list): + """Checks if an array can be an interval. Returns (start, end, step) if interval, otherwise None.""" + if len(a) < 3: + return None + if not all(isinstance(e, (int, float)) for e in a): + return None + a_min = min(a) + a_max = max(a) + if len(a) <= 2: + return (a_min, a_max, a_max - a_min) + # determine the first step size + step = a[1] - a_min + # for each element, the step size should be equal to the first step + for i, e in enumerate(a): + if e - a[i - 1] != step: + return None + result = (a_min, a_max, step) + if not all(isinstance(e, (int, float)) for e in result): + return None + return result + + def get_kernel_string(kernel_source, params=None): """Retrieve the kernel source and return as a string. @@ -471,8 +497,9 @@ def get_kernel_string(kernel_source, params=None): after all. :param kernel_source: One of the sources for the kernel, could be a - function that generates the kernel code, a string containing a filename - that points to the kernel source, or just a string that contains the code. + function that generates the kernel code, a string or Path containing a + filename that points to the kernel source, or just a string that + contains the code. :type kernel_source: string or callable :param params: Dictionary containing the tunable parameters for this specific @@ -488,6 +515,8 @@ def get_kernel_string(kernel_source, params=None): kernel_string = None if callable(kernel_source): kernel_string = kernel_source(params) + elif isinstance(kernel_source, Path): + kernel_string = read_file(kernel_source) elif isinstance(kernel_source, str): if looks_like_a_filename(kernel_source): kernel_string = read_file(kernel_source) or kernel_source @@ -779,7 +808,10 @@ def prepare_kernel_string(kernel_name, kernel_string, params, grid, threads, blo def read_file(filename): """Return the contents of the file named filename or None if file not found.""" - if os.path.isfile(filename): + if isinstance(filename, Path): + with filename.open() as f: + return f.read() + elif os.path.isfile(filename): with open(filename, "r") as f: return f.read() @@ -841,7 +873,7 @@ def has_kw_argument(func, name): def parse_restrictions( - restrictions: list[str], tune_params: dict, monolithic=False, try_to_constraint=True + restrictions: list[str], tune_params: dict, monolithic=False, format=None, try_to_constraint=True ) -> list[tuple[Union[Constraint, str], list[str]]]: """Parses restrictions from a list of strings into compilable functions and constraints, or a single compilable function (if monolithic is True). Returns a list of tuples of (strings or constraints) and parameters.""" # rewrite the restrictions so variables are singled out @@ -849,7 +881,7 @@ def parse_restrictions( def replace_params(match_object): key = match_object.group(1) - if key in tune_params: + if key in tune_params and format != "pyatf": param = str(key) return "params[params_index['" + param + "']]" else: @@ -1019,6 +1051,15 @@ def to_equality_constraint( return ValueError(f"Not possible: comparator should be '==' or '!=', is {comparator}") return None + # remove functionally duplicate restrictions (preserves order and whitespace) + if all(isinstance(r, str) for r in restrictions): + # clean the restriction strings to functional equivalence + restrictions_cleaned = [r.replace(" ", "") for r in restrictions] + restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned)) # dict preserves order + # get the indices of the unique restrictions, use these to build a new list of restrictions + restrictions_unique_indices = [restrictions_cleaned.index(r) for r in restrictions_cleaned_unique] + restrictions = [restrictions[i] for i in restrictions_unique_indices] + # create the parsed restrictions if monolithic is False: # split into multiple restrictions where possible @@ -1041,14 +1082,49 @@ def to_equality_constraint( ): parsed_restriction = parsed_restriction[1:-1] # check if we can turn this into the built-in numeric comparison constraint - finalized_constraint = to_numeric_constraint(parsed_restriction, params_used) + if all( + all(isinstance(v, (int, float)) and type(v) is not type(True) for v in tune_params[param]) + for param in params_used + ): + finalized_constraint = to_numeric_constraint(parsed_restriction, params_used) if finalized_constraint is None: # check if we can turn this into the built-in equality comparison constraint finalized_constraint = to_equality_constraint(parsed_restriction, params_used) if finalized_constraint is None: # we must turn it into a general function - finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n" + if format is not None and format.lower() == "pyatf": + finalized_constraint = parsed_restriction + else: + finalized_constraint = f"def r({', '.join(params_used)}): return {parsed_restriction} \n" parsed_restrictions.append((finalized_constraint, params_used)) + + # if pyATF, restrictions that are set on the same parameter must be combined into one + if format is not None and format.lower() == "pyatf": + res_dict = dict() + registered_params = list() + registered_restrictions = list() + parsed_restrictions_pyatf = list() + for param in tune_params.keys(): + registered_params.append(param) + for index, (res, params) in enumerate(parsed_restrictions): + if index in registered_restrictions: + continue + if all(p in registered_params for p in params): + if param not in res_dict: + res_dict[param] = (list(), list()) + res_dict[param][0].append(res) + res_dict[param][1].extend(params) + registered_restrictions.append(index) + # combine multiple restrictions into one + for res_tuple in res_dict.values(): + res, params_used = res_tuple + params_used = list( + dict.fromkeys(params_used) + ) # param_used should only contain unique, dict preserves order + parsed_restrictions_pyatf.append( + (f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used) + ) + parsed_restrictions = parsed_restrictions_pyatf else: # create one monolithic function parsed_restrictions = ") and (".join( @@ -1062,20 +1138,28 @@ def to_equality_constraint( # provide a mapping of the parameter names to the index in the tuple received params_index = dict(zip(tune_params.keys(), range(len(tune_params.keys())))) - parsed_restrictions = [ - ( - f"def restrictions(*params): params_index = {params_index}; return {parsed_restrictions} \n", - list(tune_params.keys()), - ) - ] + if format == "pyatf": + parsed_restrictions = [ + ( + f"def restrictions({', '.join(params_index.keys())}): return {parsed_restrictions} \n", + list(tune_params.keys()), + ) + ] + else: + parsed_restrictions = [ + ( + f"def restrictions(*params): params_index = {params_index}; return {parsed_restrictions} \n", + list(tune_params.keys()), + ) + ] return parsed_restrictions def compile_restrictions( - restrictions: list, tune_params: dict, monolithic=False, try_to_constraint=True -) -> list[tuple[Union[str, Constraint, FunctionType], list[str]]]: - """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used, or a single Function if monolithic is true.""" + restrictions: list, tune_params: dict, monolithic=False, format=None, try_to_constraint=True +) -> list[tuple[Union[str, Constraint, FunctionType], list[str], Union[str, None]]]: + """Parses restrictions from a list of strings into a list of strings, Functions, or Constraints (if `try_to_constraint`) and parameters used and source, or a single Function if monolithic is true.""" # filter the restrictions to get only the strings restrictions_str, restrictions_ignore = [], [] for r in restrictions: @@ -1085,7 +1169,7 @@ def compile_restrictions( # parse the strings parsed_restrictions = parse_restrictions( - restrictions_str, tune_params, monolithic=monolithic, try_to_constraint=try_to_constraint + restrictions_str, tune_params, monolithic=monolithic, format=format, try_to_constraint=try_to_constraint ) # compile the parsed restrictions into a function @@ -1095,10 +1179,10 @@ def compile_restrictions( # if it's a string, parse it to a function code_object = compile(restriction, "", "exec") func = FunctionType(code_object.co_consts[0], globals()) - compiled_restrictions.append((func, params_used)) + compiled_restrictions.append((func, params_used, restriction)) elif isinstance(restriction, Constraint): # otherwise it already is a Constraint, pass it directly - compiled_restrictions.append((restriction, params_used)) + compiled_restrictions.append((restriction, params_used, None)) else: raise ValueError(f"Restriction {restriction} is neither a string or Constraint {type(restriction)}") @@ -1110,9 +1194,10 @@ def compile_restrictions( noncompiled_restrictions = [] for r in restrictions_ignore: if isinstance(r, tuple) and len(r) == 2 and isinstance(r[1], (list, tuple)): - noncompiled_restrictions.append(r) + restriction, params_used = r + noncompiled_restrictions.append((restriction, params_used, restriction)) else: - noncompiled_restrictions.append((r, ())) + noncompiled_restrictions.append((r, [], r)) return noncompiled_restrictions + compiled_restrictions @@ -1169,7 +1254,9 @@ def process_cache(cache, kernel_options, tuning_options, runner): # if file exists else: - cached_data = read_cache(cache) + cached_data = read_cache( + cache, not tuning_options.simulation_mode + ) # don't open the cache in (parallel) simulation mode to avoid race conditions # if in simulation mode, use the device name from the cache file as the runner device name if runner.simulation_mode: @@ -1177,10 +1264,17 @@ def process_cache(cache, kernel_options, tuning_options, runner): # check if it is safe to continue tuning from this cache if cached_data["device_name"] != runner.dev.name: - raise ValueError("Cannot load cache which contains results for different device") + raise ValueError( + f"Cannot load cache which contains results for different device (cache: {cached_data['device_name']}, actual: {runner.dev.name})" + ) if cached_data["kernel_name"] != kernel_options.kernel_name: - raise ValueError("Cannot load cache which contains results for different kernel") + raise ValueError( + f"Cannot load cache which contains results for different kernel (cache: {cached_data['kernel_name']}, actual: {kernel_options.kernel_name})" + ) if "problem_size" in cached_data and not callable(kernel_options.problem_size): + # if it's a single value, convert to an array + if isinstance(cached_data["problem_size"], int): + cached_data["problem_size"] = [cached_data["problem_size"]] # if problem_size is not iterable, compare directly if not hasattr(kernel_options.problem_size, "__iter__"): if cached_data["problem_size"] != kernel_options.problem_size: @@ -1189,7 +1283,9 @@ def process_cache(cache, kernel_options, tuning_options, runner): # cache returns list, problem_size is likely a tuple. Therefore, the next check # checks the equality of all items in the list/tuples individually elif not all([i == j for i, j in zip(cached_data["problem_size"], kernel_options.problem_size)]): - raise ValueError("Cannot load cache which contains results for different problem_size") + raise ValueError( + f"Cannot load cache which contains results for different problem_size ({cached_data['problem_size']=} != {kernel_options.problem_size=})" + ) if cached_data["tune_params_keys"] != list(tuning_options.tune_params.keys()): if all(key in tuning_options.tune_params for key in cached_data["tune_params_keys"]): raise ValueError( @@ -1212,7 +1308,7 @@ def correct_open_cache(cache, open_cache=True): filestr = cachefile.read().strip() # if file was not properly closed, pretend it was properly closed - if len(filestr) > 0 and not filestr[-3:] in ["}\n}", "}}}"]: + if len(filestr) > 0 and filestr[-3:] not in ["}\n}", "}}}"]: # remove the trailing comma if any, and append closing brackets if filestr[-1] == ",": filestr = filestr[:-1] diff --git a/noxfile.py b/noxfile.py index 38fc6680a..2770bc7f1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -15,7 +15,7 @@ # set the test parameters verbose = False -python_versions_to_test = ["3.9", "3.10", "3.11", "3.12"] +python_versions_to_test = ["3.10", "3.11", "3.12", "3.13"] # 3.14 has not yet been officially released so is not tested against, but is allowed by the pyproject.toml nox.options.stop_on_first_error = True nox.options.error_on_missing_interpreters = True nox.options.default_venv_backend = 'virtualenv' @@ -85,7 +85,7 @@ def check_development_environment(session: Session) -> None: return None output: str = session.run("poetry", "install", "--sync", "--dry-run", "--with", "test", silent=True, external=True) match = re.search(r"Package operations: (\d+) (?:install|installs), (\d+) (?:update|updates), (\d+) (?:removal|removals), \d+ skipped", output) - assert match is not None, f"Invalid output: {output}" + assert match is not None, f"Could not check development environment, reason: {output}" groups = match.groups() installs, updates, removals = int(groups[0]), int(groups[1]), int(groups[2]) if installs > 0 or updates > 0: diff --git a/pyproject.toml b/pyproject.toml index 04b4512d2..cf56f361e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,24 +2,22 @@ requires = ["poetry-core>=1.7.0", "setuptools>=67.7.2"] build-backend = "poetry.core.masonry.api" -[tool.poetry] +[project] name = "kernel_tuner" -packages = [{ include = "kernel_tuner", from = "." }] description = "An easy to use CUDA/OpenCL kernel tuner in Python" -version = "1.1.3" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55 +version = "1.3.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55 +readme = "README.md" license = "Apache-2.0" authors = [ - "Ben van Werkhoven ", - "Alessio Sclocco ", - "Stijn Heldens ", - "Floris-Jan Willemsen ", - "Willem-Jan Palenstijn ", - "Bram Veenboer ", - "Richard Schoonhoven ", - "Leon Oostrum =1.26.0,<2.0.0)", # Python 3.12 requires numpy at least 1.26, CuPy does not support 2.0 + "scipy>=1.14.1", + "packaging", # required by file_utils + "jsonschema", + "python-constraint2>=2.2.2", + "xmltodict", + "pandas>=2.0.0", + "scikit-learn>=1.0.2", +] +# NOTE Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it + +[project.urls] homepage = "https://KernelTuner.github.io/kernel_tuner/" documentation = "https://KernelTuner.github.io/kernel_tuner/" repository = "https://github.com/KernelTuner/kernel_tuner" -[tool.poetry.urls] -"Tracker" = "https://github.com/KernelTuner/kernel_tuner/issues" -[tool.poetry.build] -generate-setup-file = false -[tool.poetry.scripts] -kernel_tuner = "kernel_tuner.interface:entry_point" +changelog = "https://github.com/KernelTuner/kernel_tuner/blob/master/CHANGELOG.md" +issues = "https://github.com/KernelTuner/kernel_tuner/issues" -[[tool.poetry.source]] -name = "testpypi" -url = "https://test.pypi.org/simple/" -priority = "explicit" +[project.scripts] +kernel_tuner = "kernel_tuner.interface:entry_point" -# ATTENTION: if anything is changed here, run `poetry update` -[tool.poetry.dependencies] -python = ">=3.9,<4" # <4 is because of hip-python # NOTE when changing the supported Python versions, also change the test versions in the noxfile -numpy = "^2.0.0" # Python 3.12 requires numpy at least 1.26 -scipy = ">=1.11.0" # held back by Python 3.9 -packaging = "*" # required by file_utils -jsonschema = "*" -python-constraint2 = "^2.1.0" -xmltodict = "*" -pandas = ">=2.0.0" -scikit-learn = ">=1.0.2" -# Torch can be used with Kernel Tuner, but is not a dependency, should be up to the user to use it +[tool.poetry] +packages = [{ include = "kernel_tuner", from = "." }] +include = [ + { path = "test" }, +] # this ensures that people won't have to clone the whole repo to include notebooks, they can just do `pip install kernel_tuner[tutorial,cuda]` # List of optional dependencies for user installation, e.g. `pip install kernel_tuner[cuda]`, used in the below `extras`. # Please note that this is different from the dependency groups below, e.g. `docs` and `test`, those are for development. # ATTENTION: if anything is changed here, run `poetry update` # CUDA -pycuda = { version = "^2024.1", optional = true } # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile -nvidia-ml-py = { version = "^12.535.108", optional = true } -pynvml = { version = "^11.4.1", optional = true } -# cupy-cuda11x = { version = "*", optional = true } # Note: these are completely optional dependencies as described in CONTRIBUTING.rst +# cupy-cuda11x = { version = "*", optional = true } # NOTE: these are completely optional dependencies as described in CONTRIBUTING.rst # cupy-cuda12x = { version = "*", optional = true } # cuda-python = { version = "*", optional = true } -# OpenCL -pyopencl = { version = "*", optional = true } # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile -# HIP -hip-python = { version = "^6.3.3.540.31", source = "testpypi", optional = true } # Note: when released, switch this package to pypi and remove tool.poetry.source -# Tutorial (for the notebooks used in the examples) -jupyter = { version = "^1.0.0", optional = true } -matplotlib = { version = "^3.5.0", optional = true } -[tool.poetry.extras] -cuda = ["pycuda", "nvidia-ml-py", "pynvml"] -opencl = ["pyopencl"] -cuda_opencl = ["pycuda", "pyopencl"] +[[tool.poetry.source]] +name = "testpypi" +url = "https://test.pypi.org/simple/" +priority = "explicit" + +[tool.poetry.dependencies] +hip-python = { version = "^6.3.3.540.31", source = "testpypi", optional = true } # Note: when released, switch this package to pypi and remove tool.poetry.source and move this to [project.optional-dependencies] + +[project.optional-dependencies] +cuda = ["pycuda>=2025.1", "nvidia-ml-py>=12.535.108", "pynvml>=11.4.1"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile +opencl = ["pyopencl"] # Attention: if pyopencl is changed here, also change `session.install("pyopencl")` in the Noxfile +cuda_opencl = ["pycuda>=2024.1", "pyopencl"] # Attention: if pycuda is changed here, also change `session.install("pycuda")` in the Noxfile hip = ["hip-python"] -tutorial = ["jupyter", "matplotlib", "nvidia-ml-py"] +tutorial = ["jupyter>=1.0.0", "matplotlib>=3.5.0", "nvidia-ml-py>=12.535.108"] # ATTENTION: if anything is changed here, run `poetry update` and `poetry export --with docs --without-hashes --format=requirements.txt --output doc/requirements.txt` # Please note that there is overlap with the `dev` group diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..1539a6cdf --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +def pytest_collection_modifyitems(items): + for item in items: + if item.get_closest_marker('timeout') is None: + item.add_marker(pytest.mark.timeout(60)) \ No newline at end of file diff --git a/test/context.py b/test/context.py index 016ee0af6..c31a64487 100644 --- a/test/context.py +++ b/test/context.py @@ -1,7 +1,6 @@ -import sys -import subprocess import shutil -import os +import subprocess +import sys import pytest @@ -48,7 +47,7 @@ try: import cuda - + print(cuda) cuda_present = True except Exception: cuda_present = False @@ -60,6 +59,26 @@ except (ImportError, RuntimeError): hip_present = False +try: + import botorch + import torch + bayes_opt_botorch_present = True +except ImportError: + bayes_opt_botorch_present = False + +try: + import gpytorch + import torch + bayes_opt_gpytorch_present = True +except ImportError: + bayes_opt_gpytorch_present = False + +try: + from autotuning_methodology.report_experiments import get_strategy_scores + methodology_present = True +except ImportError: + methodology_present = False + skip_if_no_pycuda = pytest.mark.skipif( not pycuda_present, reason="PyCuda not installed or no CUDA device detected" ) @@ -79,7 +98,10 @@ ) skip_if_no_openmp = pytest.mark.skipif(not openmp_present, reason="No OpenMP found") skip_if_no_openacc = pytest.mark.skipif(not openacc_present, reason="No nvc++ on PATH") -skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found or no HIP device detected") +skip_if_no_bayesopt_gpytorch = pytest.mark.skipif(not bayes_opt_gpytorch_present, reason="Torch and GPyTorch not installed") +skip_if_no_bayesopt_botorch = pytest.mark.skipif(not bayes_opt_botorch_present, reason="Torch and BOTorch not installed") +skip_if_no_pyhip = pytest.mark.skipif(not hip_present, reason="No HIP Python found") +skip_if_no_methodology = pytest.mark.skipif(not methodology_present, reason="Autotuning Methodology not found") def skip_backend(backend: str): diff --git a/test/convolution.cu b/test/convolution.cu new file mode 100644 index 000000000..ecafcf4b8 --- /dev/null +++ b/test/convolution.cu @@ -0,0 +1,166 @@ +#define image_height 4096 +#define image_width 4096 + +#ifndef filter_height + #define filter_height 17 +#endif +#ifndef filter_width + #define filter_width 17 +#endif + +#define border_height ((filter_height/2)*2) +#define border_width ((filter_width/2)*2) +#define input_height (image_height + border_height) +#define input_width (image_width + border_width) + +#ifndef block_size_x + #define block_size_x 16 +#endif +#ifndef block_size_y + #define block_size_y 16 +#endif +#ifndef block_size_z + #define block_size_z 1 +#endif +#ifndef tile_size_x + #define tile_size_x 1 +#endif +#ifndef tile_size_y + #define tile_size_y 1 +#endif + +#define i_end min(block_size_y*tile_size_y+border_height, input_height) +#define j_end min(block_size_x*tile_size_x+border_width, input_width) + +/* + * If requested, we can use the __ldg directive to load data through the + * read-only cache. + */ +#define USE_READ_ONLY_CACHE read_only +#if USE_READ_ONLY_CACHE == 1 +#define LDG(x, y) __ldg(x+y) +#elif USE_READ_ONLY_CACHE == 0 +#define LDG(x, y) x[y] +#endif + +__constant__ float d_filter[33*33]; //large enough for the largest filter + +/* + * If use_padding == 1, we introduce (only when necessary) a number of padding + * columns in shared memory to avoid shared memory bank conflicts + * + * padding columns are only inserted when block_size_x is not a multiple of 32 (the assumed number of memory banks) + * and when the width of the data needed is not a multiple of 32. The latter is because some filter_widths never + * cause bank conflicts. + * + * If not passed as a tunable parameter, padding is on by default + */ +#define shared_mem_width (block_size_x*tile_size_x+border_width) +#ifndef use_padding + #define use_padding 1 +#endif +#if use_padding == 1 + #if (((block_size_x % 32)!=0) && (((shared_mem_width-block_size_x)%32) != 0)) + // next line uses &31 instead of %32, because % in C is remainder not modulo + #define padding_columns ((32 - (border_width + block_size_x*tile_size_x - block_size_x)) & 31) + #undef shared_mem_width + #define shared_mem_width (block_size_x*tile_size_x+border_width+padding_columns) + #endif +#endif + + +__global__ void convolution_kernel(float *output, float *input, float *filter) { + int ty = threadIdx.y; + int tx = threadIdx.x; + int by = blockIdx.y * block_size_y * tile_size_y; + int bx = blockIdx.x * block_size_x * tile_size_x; + + //shared memory to hold all input data need by this thread block + __shared__ float sh_input[block_size_y*tile_size_y+border_height][shared_mem_width]; + + //load all input data needed by this thread block into shared memory + #pragma unroll + for (int i=ty; i 0 @@ -57,13 +75,17 @@ def test_strategies(vector_add, strategy): unique_results = {} for result in results: x_int = ",".join([str(v) for k, v in result.items() if k in tune_params]) - if not isinstance(result["time"], util.InvalidConfig): + if not isinstance(result["time"], InvalidConfig): unique_results[x_int] = result["time"] assert len(unique_results) <= filter_options["max_fevals"] # check whether the returned dictionaries contain exactly the expected keys and the appropriate type expected_items = { 'block_size_x': int, + 'test_string': str, + 'test_single': int, + 'test_bool': bool, + 'test_mixed': float, 'time': (float, int), 'times': list, 'compile_time': (float, int), diff --git a/test/test_T1_input.json b/test/test_T1_input.json new file mode 100644 index 000000000..7ae9aa218 --- /dev/null +++ b/test/test_T1_input.json @@ -0,0 +1,133 @@ +{ + "General": { + "BenchmarkName": "convolution", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "filter_width", + "Type": "int", + "Values": "[i for i in range(3,35,2)]", + "Default": 17 + }, + { + "Name": "filter_height", + "Type": "int", + "Values": "[i for i in range(3,35,2)]", + "Default": 17 + }, + { + "Name": "block_size_x", + "Type": "int", + "Values": "[16*i for i in range(1,9)]", + "Default": 16 + }, + { + "Name": "block_size_y", + "Type": "int", + "Values": "[2**i for i in range(6)]", + "Default": 16 + }, + { + "Name": "tile_size_x", + "Type": "int", + "Values": "[i for i in range(1,9)]", + "Default": 1 + }, + { + "Name": "tile_size_y", + "Type": "int", + "Values": "[i for i in range(1,9)]", + "Default": 1 + }, + { + "Name": "use_padding", + "Type": "int", + "Values": "[0, 1]", + "Default": 1 + }, + { + "Name": "read_only", + "Type": "int", + "Values": "[0, 1]", + "Default": 0 + } + ], + "Conditions": [ + { + "Expression": "use_padding==0 or (block_size_x % 32 != 0)", + "Parameters": [ + "use_padding", + "block_size_x", + "block_size_y" + ] + } + ] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "GEMM", + "KernelName": "convolution_kernel", + "KernelFile": "convolution.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "block_size_y", + "Z": "1" + }, + "GlobalSize": { + "X": "(262144 // block_size_x) // tile_size_x", + "Y": "(262144 // block_size_y) // tile_size_y", + "Z": "1" + }, + "GridDivX": [ + "block_size_x", + "tile_size_x" + ], + "GridDivY": [ + "block_size_y", + "tile_size_y" + ], + "ProblemSize": [ + 4096, + 4096 + ], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "output_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": 16777216, + "FillValue": 0.0, + "Output": 1 + }, + { + "Name": "input_image", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": "(ProblemSize[0]+max(filter_width)-1) * (ProblemSize[1]+max(filter_height)-1)", + "FillValue": 1.0 + }, + { + "Name": "d_filter", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "MemType": "Constant", + "FillType": "Random", + "Size": "max(filter_height) * max(filter_width)", + "FillValue": 1.0 + } + ] + } +} \ No newline at end of file diff --git a/test/test_compiler_functions.py b/test/test_compiler_functions.py index fe30c0b58..85391d44a 100644 --- a/test/test_compiler_functions.py +++ b/test/test_compiler_functions.py @@ -1,21 +1,21 @@ -from datetime import datetime -import numpy as np import ctypes as C + +import numpy as np import pytest from pytest import raises try: - from mock import patch, Mock + from mock import Mock, patch except ImportError: - from unittest.mock import patch, Mock + from unittest.mock import Mock, patch import kernel_tuner -from kernel_tuner.backends.compiler import CompilerFunctions, Argument, is_cupy_array, get_array_module -from kernel_tuner.core import KernelSource, KernelInstance -from kernel_tuner import util +from kernel_tuner.backends.compiler import Argument, CompilerFunctions, get_array_module, is_cupy_array +from kernel_tuner.core import KernelInstance, KernelSource +from kernel_tuner.util import delete_temp_file -from .context import skip_if_no_gfortran, skip_if_no_gcc, skip_if_no_openmp, skip_if_no_cupy +from .context import skip_if_no_cupy, skip_if_no_gcc, skip_if_no_gfortran, skip_if_no_openmp from .test_runners import env as cuda_env # noqa: F401 @@ -272,7 +272,7 @@ def test_complies_fortran_function_with_module(): assert np.isclose(result, 42.0) finally: - util.delete_temp_file("my_fancy_module.mod") + delete_temp_file("my_fancy_module.mod") @pytest.fixture diff --git a/test/test_file_utils.py b/test/test_file_utils.py index 622e06b44..732282175 100644 --- a/test/test_file_utils.py +++ b/test/test_file_utils.py @@ -1,4 +1,5 @@ import json +from pathlib import Path import pytest import ctypes @@ -10,13 +11,19 @@ except: hip = None -from kernel_tuner.file_utils import output_file_schema, store_metadata_file, store_output_file +from kernel_tuner.file_utils import get_input_file, output_file_schema, store_metadata_file, store_output_file from kernel_tuner.util import delete_temp_file, check_argument_list -from .context import skip_if_no_hip +from .context import skip_if_no_pyhip from .test_runners import cache_filename, env, tune_kernel # noqa: F401 +def test_get_input_file(env): + filename = Path(__file__).parent / "test_T1_input.json" + assert filename.exists() + contents = get_input_file(filename, validate=True) + assert isinstance(contents, dict) + def test_store_output_file(env): # setup variables filename = "test_output_file.json" @@ -73,7 +80,7 @@ def hip_check(call_result): raise RuntimeError(str(err)) return result -@skip_if_no_hip +@skip_if_no_pyhip def test_check_argument_list_device_array(): """Test check_argument_list with DeviceArray""" float_kernel = """ diff --git a/test/test_hip_functions.py b/test/test_hip_functions.py index e192223ed..d82f76e97 100644 --- a/test/test_hip_functions.py +++ b/test/test_hip_functions.py @@ -6,7 +6,7 @@ from kernel_tuner.backends import hip as kt_hip from kernel_tuner.core import KernelInstance, KernelSource -from .context import skip_if_no_hip +from .context import skip_if_no_pyhip try: from hip import hip, hiprtc @@ -48,7 +48,7 @@ def env(): return ["vector_add", kernel_string, size, args, tune_params] -@skip_if_no_hip +@skip_if_no_pyhip def test_ready_argument_list(): size = 1000 a = np.int32(75) @@ -67,7 +67,7 @@ def test_ready_argument_list(): assert gpu_args[1].value == a assert gpu_args[3].value == c -@skip_if_no_hip +@skip_if_no_pyhip def test_compile(): kernel_string = """ __global__ void vector_add(float *c, float *a, float *b, int n) { @@ -87,7 +87,7 @@ def test_compile(): except Exception as e: pytest.fail("Did not expect any exception:" + str(e)) -@skip_if_no_hip +@skip_if_no_pyhip def test_memset_and_memcpy_dtoh(): a = [1, 2, 3, 4] x = np.array(a).astype(np.int8) @@ -101,7 +101,7 @@ def test_memset_and_memcpy_dtoh(): assert all(output == np.full(4, 4)) -@skip_if_no_hip +@skip_if_no_pyhip def test_memcpy_htod(): a = [1, 2, 3, 4] x = np.array(a).astype(np.float32) @@ -114,7 +114,7 @@ def test_memcpy_htod(): assert all(output == x) -@skip_if_no_hip +@skip_if_no_pyhip def test_copy_constant_memory_args(): kernel_string = """ __constant__ float my_constant_data[100]; @@ -147,7 +147,7 @@ def test_copy_constant_memory_args(): assert (my_constant_data == output).all() -@skip_if_no_hip +@skip_if_no_pyhip def test_smem_args(env): result, _ = tune_kernel(*env, smem_args=dict(size="block_size_x*4"), diff --git a/test/test_hyper.py b/test/test_hyper.py index 9d1dc55df..7863c2e47 100644 --- a/test/test_hyper.py +++ b/test/test_hyper.py @@ -1,8 +1,10 @@ from kernel_tuner.hyper import tune_hyper_params -from .test_runners import cache_filename, env # noqa: F401 +from .context import skip_if_no_methodology +from .test_runners import env # noqa: F401 +@skip_if_no_methodology def test_hyper(env): hyper_params = dict() @@ -13,6 +15,25 @@ def test_hyper(env): target_strategy = "genetic_algorithm" - result = tune_hyper_params(target_strategy, hyper_params, *env, verbose=True, cache=cache_filename) - assert len(result) > 0 + compiler_options = { + "gpus": ["A100", "MI250X"], + "override": { + "experimental_groups_defaults": { + "repeats": 1, + "samples": 1, + "minimum_fraction_of_budget_valid": 0.01, + }, + "statistics_settings": { + "cutoff_percentile": 0.90, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + } + } + } + result, env = tune_hyper_params(target_strategy, hyper_params, restrictions=[], iterations=1, compiler_options=compiler_options, verbose=True, cache=None) + assert len(result) == 2 + assert 'best_config' in env diff --git a/test/test_integration.py b/test/test_integration.py index aafb437f1..637a07575 100644 --- a/test/test_integration.py +++ b/test/test_integration.py @@ -5,7 +5,7 @@ import pytest from kernel_tuner import integration -from kernel_tuner import util +from kernel_tuner.util import delete_temp_file from datetime import datetime, timezone @@ -71,7 +71,7 @@ def test_store_results(fake_results): assert my_gpu_100_data[0]["time"] < 100 finally: - util.delete_temp_file(filename) + delete_temp_file(filename) def test_setup_device_targets(fake_results): @@ -136,8 +136,8 @@ def test_setup_device_targets(fake_results): assert expected in output_str finally: - util.delete_temp_file(results_filename) - util.delete_temp_file(header_filename) + delete_temp_file(results_filename) + delete_temp_file(header_filename) def test_setup_device_targets_max(fake_results): @@ -174,5 +174,5 @@ def test_setup_device_targets_max(fake_results): assert expected in output_str finally: - util.delete_temp_file(results_filename) - util.delete_temp_file(header_filename) + delete_temp_file(results_filename) + delete_temp_file(header_filename) diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py index c706e3953..9cd2d0185 100644 --- a/test/test_kernelbuilder.py +++ b/test/test_kernelbuilder.py @@ -3,8 +3,8 @@ import pytest from kernel_tuner import kernelbuilder -from kernel_tuner import util from kernel_tuner import integration +from kernel_tuner.util import delete_temp_file backends = ["cuda", "cupy"] @@ -59,4 +59,4 @@ def test_PythonKernel_tuned(test_kernel, backend): assert np.allclose(reference[0], a+b) finally: - util.delete_temp_file(test_results_file) + delete_temp_file(test_results_file) diff --git a/test/test_observers.py b/test/test_observers.py index 97928b477..5f2242657 100644 --- a/test/test_observers.py +++ b/test/test_observers.py @@ -10,7 +10,7 @@ skip_if_no_cupy, skip_if_no_opencl, skip_if_no_pycuda, - skip_if_no_hip, + skip_if_no_pyhip, skip_if_no_pynvml, ) from .test_hip_functions import env as env_hip # noqa: F401 @@ -68,7 +68,7 @@ def test_register_observer_opencl(env_opencl): assert err.errisinstance(NotImplementedError) assert "OpenCL" in str(err.value) -@skip_if_no_hip +@skip_if_no_pyhip def test_register_observer_hip(env_hip): with raises(NotImplementedError) as err: kernel_tuner.tune_kernel(*env_hip, observers=[RegisterObserver()], lang='HIP') diff --git a/test/test_runners.py b/test/test_runners.py index 527c1d252..9aca7a328 100644 --- a/test/test_runners.py +++ b/test/test_runners.py @@ -140,6 +140,22 @@ def test_diff_evo(env): assert len(result) > 0 +def test_constraint_aware_GA(env): + options = dict(method="uniform", + constraint_aware=True, + popsize=5, + maxiter=2, + mutation_chance=10, + max_fevals=10) + result, _ = tune_kernel(*env, + strategy="genetic_algorithm", + strategy_options=options, + verbose=True, + cache=cache_filename, + simulation_mode=True) + assert len(result) > 0 + + @skip_if_no_pycuda def test_time_keeping(env): kernel_name, kernel_string, size, args, tune_params = env @@ -158,7 +174,7 @@ def test_time_keeping(env): answer=answer) max_time = (time.perf_counter() - start) * 1e3 # ms - assert len(result) >= 10 + assert len(result) >= 10, f"{len(result)=} < 10 for {kernel_name=} with {tune_params=}" timings = [ 'total_framework_time', 'total_strategy_time', 'total_compile_time', diff --git a/test/test_searchspace.py b/test/test_searchspace.py index 8672c1d03..7b43fc722 100644 --- a/test/test_searchspace.py +++ b/test/test_searchspace.py @@ -27,6 +27,10 @@ simple_searchspace = Searchspace(simple_tune_params, restrict, max_threads) simple_searchspace_bruteforce = Searchspace(simple_tune_params, restrict, max_threads, framework="bruteforce") +simple_tune_params_single = simple_tune_params.copy() +simple_tune_params_single["s"] = [True] +simple_searchspace_single = Searchspace(simple_tune_params_single, restrict, max_threads) + # 3.1 million combinations, of which 10600 pass the restrictions num_layers = 42 tune_params = dict() diff --git a/test/test_time_budgets.py b/test/test_time_budgets.py new file mode 100644 index 000000000..acf10a0e9 --- /dev/null +++ b/test/test_time_budgets.py @@ -0,0 +1,77 @@ +from itertools import product +from time import perf_counter + +import numpy as np +import pytest +from pytest import raises + +from kernel_tuner import tune_kernel + +from .context import skip_if_no_gcc + + +@pytest.fixture +def env(): + kernel_name = "vector_add" + kernel_string = """ + #include + + float vector_add(float *c, float *a, float *b, int n) { + struct timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); + + for (int i = 0; i < n; i++) { + c[i] = a[i] + b[i]; + } + + clock_gettime(CLOCK_MONOTONIC, &end); + double elapsed = (end.tv_sec - start.tv_sec) * 1e3 + (end.tv_nsec - start.tv_nsec) / 1e6; + return (float) elapsed; + }""" + + size = 100 + a = np.random.randn(size).astype(np.float32) + b = np.random.randn(size).astype(np.float32) + c = np.zeros_like(b) + n = np.int32(size) + + args = [c, a, b, n] + tune_params = {"nthreads": [1, 2, 4]} + + return kernel_name, kernel_string, size, args, tune_params + + +@skip_if_no_gcc +def test_no_time_budget(env): + """Ensure that a RuntimeError is raised if the startup takes longer than the time budget.""" + with raises(RuntimeError, match='startup time of the tuning process'): + tune_kernel(*env, strategy="random_sample", strategy_options={"strategy": "random_sample", "time_limit": 0.0}) + +@skip_if_no_gcc +def test_some_time_budget(env): + """Ensure that the time limit is respected.""" + time_limit = 1.0 + kernel_name, kernel_string, size, args, tune_params = env + tune_params["bogus"] = list(range(1000)) + env = kernel_name, kernel_string, size, args, tune_params + + # Ensure that if the tuning takes longer than the time budget, the results are returned early. + start_time = perf_counter() + res, _ = tune_kernel(*env, strategy="random_sample", strategy_options={"time_limit": time_limit}) + + # Ensure that there are at least some results, but not all. + size_all = len(list(product(*tune_params.values()))) + assert 0 < len(res) < size_all + + # Ensure that the time limit was respected by some margin. + assert perf_counter() - start_time < time_limit * 2 + +@skip_if_no_gcc +def test_full_time_budget(env): + """Ensure that given ample time budget, the entire space is explored.""" + res, _ = tune_kernel(*env, strategy="brute_force", strategy_options={"time_limit": 10.0}) + + # Ensure that the entire space is explored. + tune_params = env[-1] + size_all = len(list(product(*tune_params.values()))) + assert len(res) == size_all diff --git a/tune_bo_conv.py b/tune_bo_conv.py new file mode 100644 index 000000000..ec37fbf67 --- /dev/null +++ b/tune_bo_conv.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +from collections import OrderedDict +from pathlib import Path + +import numpy + +import kernel_tuner + +# file_path_results = "../last_run/_tune_configuration-results.json" +# file_path_metadata = "../last_run/_tune_configuration-metadata.json" + + +def ops(w, h, fw, fh): + return (w * h * fw * fh * 2) / 1e9 + + +unit = "GFLOP" +w = h = 4096 +fw = fh = 15 +inputs = [w, h, fw, fh] +total_flops = ops(w, h, fw, fh) + + +def tune( + device_name: str, + strategy="bayes_opt_BOTorch_transfer_weighted", + strategy_options={ 'max_fevals': 150 }, + verbose=True, + quiet=False, + simulation_mode=True, + lang="CUDA", + profiling=True, +): + directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/" + assert directory.exists() + if lang == "CUDA": + kernel_file = directory / "kernels/convolution_milo.cu" + elif lang == "HIP": + kernel_file = directory / "kernels/convolution_milo.cu.hip" + else: + raise ValueError(f"Invalid {lang=}") + + with kernel_file.open() as fp: + kernel_string = fp.read() + + # setup tunable parameters + tune_params = OrderedDict() + + # tune_params["pwr_limit"] = get_pwr_limit(pwr_limit, 0) + + image_width, image_height, filter_width, filter_height = inputs + + tune_params["block_size_x"] = [16 * i for i in range(1, 17)] + tune_params["block_size_y"] = [2**i for i in range(5)] + tune_params["tile_size_x"] = [i for i in range(1, 5)] + tune_params["tile_size_y"] = [i for i in range(1, 5)] + tune_params["read_only"] = [0, 1] # toggle using the read-only cache + + # do dry run + # tune_params["nvml_gr_clock"] = [2100] + # tune_params["block_size_x"] = [16] + # tune_params["block_size_y"] = [1] + # tune_params["tile_size_x"] = [1, 2, 4] + # tune_params["tile_size_y"] = [1] + # tune_params["read_only"] = [1] #toggle using the read-only cache + + tune_params["use_padding"] = [0, 1] # toggle the insertion of padding in shared memory + tune_params["use_shmem"] = [0, 1] + tune_params["use_cmem"] = [1] + tune_params["filter_height"] = [filter_height] + tune_params["filter_width"] = [filter_width] + + # limit the search to only use padding when its effective + restrict = [ + "use_padding==0 or block_size_x % 32 != 0", + "block_size_x*block_size_y<=1024", + "use_padding==0 or use_shmem != 0", + "use_shmem == 0 or (((block_size_x*tile_size_x+(filter_width-1)))*((block_size_y*tile_size_y+(filter_height-1)))) < 12*1024", + ] + + # print(restrict) + + problem_size = (image_width, image_height) + size = numpy.prod(problem_size) + largest_fh = filter_height + largest_fw = filter_width + input_size = (problem_size[0] + largest_fw - 1) * (problem_size[1] + largest_fh - 1) + + output_image = numpy.zeros(size).astype(numpy.float32) + input_image = numpy.random.randn(input_size).astype(numpy.float32) + filter_weights = numpy.random.randn(largest_fh * largest_fw).astype(numpy.float32) + + cmem_args = {"d_filter": filter_weights} + args = [output_image, input_image, filter_weights] + + grid_div_x = ["block_size_x", "tile_size_x"] + grid_div_y = ["block_size_y", "tile_size_y"] + + total_flops = ops(*inputs) + metrics = OrderedDict() + metrics["GFLOP/s"] = lambda p: total_flops / (p["time"] / 1000.0) + + cache_dir = directory / "cachefiles/convolution_milo" + cache_filename = f"{device_name}.json" + transfer_learning_caches = [p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filename] + + def run(): + return kernel_tuner.tune_kernel( + "convolution_kernel", + kernel_string, + problem_size, + args, + tune_params, + grid_div_y=grid_div_y, + grid_div_x=grid_div_x, + cmem_args=cmem_args, + restrictions=restrict, + cache=cache_dir / cache_filename, + metrics=metrics, + lang=lang, + iterations=32, + device=0, + verbose=verbose, + quiet=quiet, + strategy=strategy, + strategy_options=strategy_options, + simulation_mode=simulation_mode, + transfer_learning_caches=transfer_learning_caches + ) + + # start tuning + if profiling: + import cProfile + + with cProfile.Profile() as pr: + results, env = run() + if profiling: + pr.dump_stats('bo_prof_tl2.prof') + else: + results, env = run() + + + # store_output_file(file_path_results, results, tune_params) + # store_metadata_file(file_path_metadata) + # print(results) + # print(env) + return results, env + + +if __name__ == "__main__": + # language = sys.argv[1] + # device_name = sys.argv[2] + language = "CUDA" + device_name = "A100" + + # if len(sys.argv) != 2: + # print("Usage: ./convolution.py [language ('HIP' or 'CUDA')] [device name]") + # exit(1) + + if language not in ("HIP", "CUDA"): + raise ValueError(f"{language} not valid, specify HIP or CUDA") + + tune(device_name=device_name, lang=language) diff --git a/tune_bo_dedisp.py b/tune_bo_dedisp.py new file mode 100644 index 000000000..78b4b0474 --- /dev/null +++ b/tune_bo_dedisp.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +import os +from collections import OrderedDict +from pathlib import Path + +import kernel_tuner as kt + +nr_dms = 2048 +nr_samples = 25000 +nr_channels = 1536 +max_shift = 650 +nr_samples_per_channel = (nr_samples+max_shift) +down_sampling = 1 +dm_first = 0.0 +dm_step = 0.02 + +channel_bandwidth = 0.1953125 +sampling_time = 0.00004096 +min_freq = 1425.0 +max_freq = min_freq + (nr_channels-1) * channel_bandwidth + + +def tune(device_name, strategy="bayes_opt_BOTorch_transfer_weighted", strategy_options={ 'max_fevals': 1500 }, lang='HIP', verbose=True, quiet=False, simulation_mode=True, profiling=True): + + args = [] + + answer = [None, None, None] + + problem_size = (nr_samples, nr_dms, 1) + tune_params = OrderedDict() + tune_params["block_size_x"] = [1, 2, 4, 8] + [16*i for i in range(1,3)] + tune_params["block_size_y"] = [8*i for i in range(4,33)] + tune_params["block_size_z"] = [1] + tune_params["tile_size_x"] = [i for i in range(1,5)] + tune_params["tile_size_y"] = [i for i in range(1,9)] + tune_params["tile_stride_x"] = [0, 1] + tune_params["tile_stride_y"] = [0, 1] + tune_params["loop_unroll_factor_channel"] = [0] #+ [i for i in range(1,nr_channels+1) if nr_channels % i == 0] #[i for i in range(nr_channels+1)] + + cp = [f"-I{os.path.dirname(os.path.realpath(__file__))}"] + + + check_block_size = "32 <= block_size_x * block_size_y <= 1024" + check_loop_x = "loop_unroll_factor_x <= tile_size_x and tile_size_x % loop_unroll_factor_x == 0" + check_loop_y = "loop_unroll_factor_y <= tile_size_y and tile_size_y % loop_unroll_factor_y == 0" + check_loop_channel = f"loop_unroll_factor_channel <= {nr_channels} and loop_unroll_factor_channel and {nr_channels} % loop_unroll_factor_channel == 0" + + check_tile_stride_x = "tile_size_x > 1 or tile_stride_x == 0" + check_tile_stride_y = "tile_size_y > 1 or tile_stride_y == 0" + + config_valid = [check_block_size, check_tile_stride_x, check_tile_stride_y] + + metrics = OrderedDict() + gbytes = (nr_dms * nr_samples * nr_channels)/1e9 + metrics["GB/s"] = lambda p: gbytes / (p['time'] / 1e3) + + directory = Path(__file__).parent / "../autotuning_methodology/cached_data_used/" + cache_dir = directory / "cachefiles/dedispersion_milo" + cache_filename = f"{device_name}.json" + transfer_learning_caches = [p for p in cache_dir.iterdir() if not p.stem.endswith("_T4") and p.name != cache_filename] + + assert directory.exists() + if lang == "CUDA": + kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu" + elif lang == "HIP": + kernel_file = directory / "kernels/dedisp_milo/dedispersion.cu.hip" + else: + raise ValueError(f"Invalid {lang=}") + + def run(): + return kt.tune_kernel("dedispersion_kernel", kernel_file, problem_size, args, tune_params, + answer=answer, compiler_options=cp, restrictions=config_valid, device=0, + cache=cache_dir / cache_filename, lang=lang, iterations=32, metrics=metrics, + simulation_mode=simulation_mode, verbose=verbose, quiet=quiet, strategy=strategy, + strategy_options=strategy_options, transfer_learning_caches=transfer_learning_caches) + + # start tuning + if profiling: + import cProfile + + with cProfile.Profile() as pr: + results, env = run() + if profiling: + pr.dump_stats('bo_prof_torchfit_2.prof') + else: + results, env = run() + + return results, env + +if __name__ == "__main__": + + tune("A100")