diff --git a/.gitignore b/.gitignore index b24029433..479c7188c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ poetry.lock noxenv.txt noxsettings.toml -hyperparamtuning/ +hyperparamtuning*/* *.prof ### Python ### @@ -20,6 +20,7 @@ push_to_pypi.sh *.json !kernel_tuner/schema/T1/1.0.0/input-schema.json !test/test_T1_input.json +!test_cache_file*.json *.csv .cache *.ipynb_checkpoints @@ -27,6 +28,7 @@ examples/cuda/output deploy_key *.mod temp_*.* +.DS_Store .python-version .nox @@ -41,4 +43,4 @@ temp_*.* .LSOverride .vscode -.idea \ No newline at end of file +.idea diff --git a/doc/requirements_test.txt b/doc/requirements_test.txt index 8a5ac0b63..5a6db0db7 100644 --- a/doc/requirements_test.txt +++ b/doc/requirements_test.txt @@ -1,122 +1,137 @@ -argcomplete==3.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ - --hash=sha256:927531c2fbaa004979f18c2316f6ffadcfc5cc2de15ae2624dfe65deaf60e14f \ - --hash=sha256:cef54d7f752560570291214f0f1c48c3b8ef09aca63d65de7747612666725dbc -asttokens==3.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +--extra-index-url https://test.pypi.org/simple + +argcomplete==3.6.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:65b3133a29ad53fb42c48cf5114752c7ab66c1c38544fdf6460f450c09b42591 \ + --hash=sha256:d0519b1bc867f5f4f4713c41ad0aba73a4a5f007449716b16f385f2166dc6adf +asttokens==3.0.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7 \ --hash=sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2 -attrs==25.3.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +attrs==25.3.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \ --hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b -build==1.2.2.post1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +build==1.2.2.post1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5 \ --hash=sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7 -colorama==0.4.6 ; python_version >= "3.10" and python_version <= "3.11" and sys_platform == "win32" or python_version >= "3.10" and python_version <= "3.11" and os_name == "nt" or python_version >= "3.12" and python_version < "4" and sys_platform == "win32" or python_version >= "3.12" and python_version < "4" and os_name == "nt" \ +colorama==0.4.6 ; python_version >= "3.10" and python_version < "4" and (sys_platform == "win32" or os_name == "nt") \ --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 -colorlog==6.9.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +colorlog==6.9.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 -coverage==7.7.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ - --hash=sha256:02fad4f8faa4153db76f9246bc95c1d99f054f4e0a884175bff9155cf4f856cb \ - --hash=sha256:092b134129a8bb940c08b2d9ceb4459af5fb3faea77888af63182e17d89e1cf1 \ - --hash=sha256:0ce92c5a9d7007d838456f4b77ea159cb628187a137e1895331e530973dcf862 \ - --hash=sha256:0dab4ef76d7b14f432057fdb7a0477e8bffca0ad39ace308be6e74864e632271 \ - --hash=sha256:1165490be0069e34e4f99d08e9c5209c463de11b471709dfae31e2a98cbd49fd \ - --hash=sha256:11dd6f52c2a7ce8bf0a5f3b6e4a8eb60e157ffedc3c4b4314a41c1dfbd26ce58 \ - --hash=sha256:15d54ecef1582b1d3ec6049b20d3c1a07d5e7f85335d8a3b617c9960b4f807e0 \ - --hash=sha256:171e9977c6a5d2b2be9efc7df1126fd525ce7cad0eb9904fe692da007ba90d81 \ - --hash=sha256:177d837339883c541f8524683e227adcaea581eca6bb33823a2a1fdae4c988e1 \ - --hash=sha256:18f544356bceef17cc55fcf859e5664f06946c1b68efcea6acdc50f8f6a6e776 \ - --hash=sha256:199a1272e642266b90c9f40dec7fd3d307b51bf639fa0d15980dc0b3246c1393 \ - --hash=sha256:1e6f867379fd033a0eeabb1be0cffa2bd660582b8b0c9478895c509d875a9d9e \ - --hash=sha256:2444fbe1ba1889e0b29eb4d11931afa88f92dc507b7248f45be372775b3cef4f \ - --hash=sha256:25fe40967717bad0ce628a0223f08a10d54c9d739e88c9cbb0f77b5959367542 \ - --hash=sha256:264ff2bcce27a7f455b64ac0dfe097680b65d9a1a293ef902675fa8158d20b24 \ - --hash=sha256:2a79c4a09765d18311c35975ad2eb1ac613c0401afdd9cb1ca4110aeb5dd3c4c \ - --hash=sha256:2c492401bdb3a85824669d6a03f57b3dfadef0941b8541f035f83bbfc39d4282 \ - --hash=sha256:315ff74b585110ac3b7ab631e89e769d294f303c6d21302a816b3554ed4c81af \ - --hash=sha256:34a3bf6b92e6621fc4dcdaab353e173ccb0ca9e4bfbcf7e49a0134c86c9cd303 \ - --hash=sha256:37351dc8123c154fa05b7579fdb126b9f8b1cf42fd6f79ddf19121b7bdd4aa04 \ - --hash=sha256:385618003e3d608001676bb35dc67ae3ad44c75c0395d8de5780af7bb35be6b2 \ - --hash=sha256:392cc8fd2b1b010ca36840735e2a526fcbd76795a5d44006065e79868cc76ccf \ - --hash=sha256:3d03287eb03186256999539d98818c425c33546ab4901028c8fa933b62c35c3a \ - --hash=sha256:44683f2556a56c9a6e673b583763096b8efbd2df022b02995609cf8e64fc8ae0 \ - --hash=sha256:44af11c00fd3b19b8809487630f8a0039130d32363239dfd15238e6d37e41a48 \ - --hash=sha256:452735fafe8ff5918236d5fe1feac322b359e57692269c75151f9b4ee4b7e1bc \ - --hash=sha256:4c181ceba2e6808ede1e964f7bdc77bd8c7eb62f202c63a48cc541e5ffffccb6 \ - --hash=sha256:4dd532dac197d68c478480edde74fd4476c6823355987fd31d01ad9aa1e5fb59 \ - --hash=sha256:520af84febb6bb54453e7fbb730afa58c7178fd018c398a8fcd8e269a79bf96d \ - --hash=sha256:553ba93f8e3c70e1b0031e4dfea36aba4e2b51fe5770db35e99af8dc5c5a9dfe \ - --hash=sha256:5b7b02e50d54be6114cc4f6a3222fec83164f7c42772ba03b520138859b5fde1 \ - --hash=sha256:63306486fcb5a827449464f6211d2991f01dfa2965976018c9bab9d5e45a35c8 \ - --hash=sha256:75c82b27c56478d5e1391f2e7b2e7f588d093157fa40d53fd9453a471b1191f2 \ - --hash=sha256:7ba5ff236c87a7b7aa1441a216caf44baee14cbfbd2256d306f926d16b026578 \ - --hash=sha256:7e688010581dbac9cab72800e9076e16f7cccd0d89af5785b70daa11174e94de \ - --hash=sha256:80b5b207a8b08c6a934b214e364cab2fa82663d4af18981a6c0a9e95f8df7602 \ - --hash=sha256:822fa99dd1ac686061e1219b67868e25d9757989cf2259f735a4802497d6da31 \ - --hash=sha256:881cae0f9cbd928c9c001487bb3dcbfd0b0af3ef53ae92180878591053be0cb3 \ - --hash=sha256:88d96127ae01ff571d465d4b0be25c123789cef88ba0879194d673fdea52f54e \ - --hash=sha256:8b1c65a739447c5ddce5b96c0a388fd82e4bbdff7251396a70182b1d83631019 \ - --hash=sha256:8fed429c26b99641dc1f3a79179860122b22745dd9af36f29b141e178925070a \ - --hash=sha256:9bb47cc9f07a59a451361a850cb06d20633e77a9118d05fd0f77b1864439461b \ - --hash=sha256:a6b6b3bd121ee2ec4bd35039319f3423d0be282b9752a5ae9f18724bc93ebe7c \ - --hash=sha256:ae13ed5bf5542d7d4a0a42ff5160e07e84adc44eda65ddaa635c484ff8e55917 \ - --hash=sha256:af94fb80e4f159f4d93fb411800448ad87b6039b0500849a403b73a0d36bb5ae \ - --hash=sha256:b4c144c129343416a49378e05c9451c34aae5ccf00221e4fa4f487db0816ee2f \ - --hash=sha256:b52edb940d087e2a96e73c1523284a2e94a4e66fa2ea1e2e64dddc67173bad94 \ - --hash=sha256:b559adc22486937786731dac69e57296cb9aede7e2687dfc0d2696dbd3b1eb6b \ - --hash=sha256:b838a91e84e1773c3436f6cc6996e000ed3ca5721799e7789be18830fad009a2 \ - --hash=sha256:ba8480ebe401c2f094d10a8c4209b800a9b77215b6c796d16b6ecdf665048950 \ - --hash=sha256:bc96441c9d9ca12a790b5ae17d2fa6654da4b3962ea15e0eabb1b1caed094777 \ - --hash=sha256:c90e9141e9221dd6fbc16a2727a5703c19443a8d9bf7d634c792fa0287cee1ab \ - --hash=sha256:d2e73e2ac468536197e6b3ab79bc4a5c9da0f078cd78cfcc7fe27cf5d1195ef0 \ - --hash=sha256:d3154b369141c3169b8133973ac00f63fcf8d6dbcc297d788d36afbb7811e511 \ - --hash=sha256:d66ff48ab3bb6f762a153e29c0fc1eb5a62a260217bc64470d7ba602f5886d20 \ - --hash=sha256:d6874929d624d3a670f676efafbbc747f519a6121b581dd41d012109e70a5ebd \ - --hash=sha256:e33426a5e1dc7743dd54dfd11d3a6c02c5d127abfaa2edd80a6e352b58347d1a \ - --hash=sha256:e52eb31ae3afacdacfe50705a15b75ded67935770c460d88c215a9c0c40d0e9c \ - --hash=sha256:eae79f8e3501133aa0e220bbc29573910d096795882a70e6f6e6637b09522133 \ - --hash=sha256:eebd927b86761a7068a06d3699fd6c20129becf15bb44282db085921ea0f1585 \ - --hash=sha256:eff187177d8016ff6addf789dcc421c3db0d014e4946c1cc3fbf697f7852459d \ - --hash=sha256:f5f99a93cecf799738e211f9746dc83749b5693538fbfac279a61682ba309387 \ - --hash=sha256:fbba59022e7c20124d2f520842b75904c7b9f16c854233fa46575c69949fb5b9 -decorator==5.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +coverage==7.8.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:042e7841a26498fff7a37d6fda770d17519982f5b7d8bf5278d140b67b61095f \ + --hash=sha256:04bfec25a8ef1c5f41f5e7e5c842f6b615599ca8ba8391ec33a9290d9d2db3a3 \ + --hash=sha256:0915742f4c82208ebf47a2b154a5334155ed9ef9fe6190674b8a46c2fb89cb05 \ + --hash=sha256:18c5ae6d061ad5b3e7eef4363fb27a0576012a7447af48be6c75b88494c6cf25 \ + --hash=sha256:2931f66991175369859b5fd58529cd4b73582461877ecfd859b6549869287ffe \ + --hash=sha256:2e4b6b87bb0c846a9315e3ab4be2d52fac905100565f4b92f02c445c8799e257 \ + --hash=sha256:3043ba1c88b2139126fc72cb48574b90e2e0546d4c78b5299317f61b7f718b78 \ + --hash=sha256:379fe315e206b14e21db5240f89dc0774bdd3e25c3c58c2c733c99eca96f1ada \ + --hash=sha256:42421e04069fb2cbcbca5a696c4050b84a43b05392679d4068acbe65449b5c64 \ + --hash=sha256:4dfd9a93db9e78666d178d4f08a5408aa3f2474ad4d0e0378ed5f2ef71640cb6 \ + --hash=sha256:52a523153c568d2c0ef8826f6cc23031dc86cffb8c6aeab92c4ff776e7951b28 \ + --hash=sha256:554fec1199d93ab30adaa751db68acec2b41c5602ac944bb19187cb9a41a8067 \ + --hash=sha256:581a40c7b94921fffd6457ffe532259813fc68eb2bdda60fa8cc343414ce3733 \ + --hash=sha256:5a26c0c795c3e0b63ec7da6efded5f0bc856d7c0b24b2ac84b4d1d7bc578d676 \ + --hash=sha256:5a570cd9bd20b85d1a0d7b009aaf6c110b52b5755c17be6962f8ccd65d1dbd23 \ + --hash=sha256:5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008 \ + --hash=sha256:5ac46d0c2dd5820ce93943a501ac5f6548ea81594777ca585bf002aa8854cacd \ + --hash=sha256:5c8a5c139aae4c35cbd7cadca1df02ea8cf28a911534fc1b0456acb0b14234f3 \ + --hash=sha256:6b8af63b9afa1031c0ef05b217faa598f3069148eeee6bb24b79da9012423b82 \ + --hash=sha256:769773614e676f9d8e8a0980dd7740f09a6ea386d0f383db6821df07d0f08545 \ + --hash=sha256:771eb7587a0563ca5bb6f622b9ed7f9d07bd08900f7589b4febff05f469bea00 \ + --hash=sha256:77af0f6447a582fdc7de5e06fa3757a3ef87769fbb0fdbdeba78c23049140a47 \ + --hash=sha256:7a3d62b3b03b4b6fd41a085f3574874cf946cb4604d2b4d3e8dca8cd570ca501 \ + --hash=sha256:821f7bcbaa84318287115d54becb1915eece6918136c6f91045bb84e2f88739d \ + --hash=sha256:89b1f4af0d4afe495cd4787a68e00f30f1d15939f550e869de90a86efa7e0814 \ + --hash=sha256:8a1d96e780bdb2d0cbb297325711701f7c0b6f89199a57f2049e90064c29f6bd \ + --hash=sha256:8a40fcf208e021eb14b0fac6bdb045c0e0cab53105f93ba0d03fd934c956143a \ + --hash=sha256:8f99eb72bf27cbb167b636eb1726f590c00e1ad375002230607a844d9e9a2318 \ + --hash=sha256:90e7fbc6216ecaffa5a880cdc9c77b7418c1dcb166166b78dbc630d07f278cc3 \ + --hash=sha256:94ec0be97723ae72d63d3aa41961a0b9a6f5a53ff599813c324548d18e3b9e8c \ + --hash=sha256:95aa6ae391a22bbbce1b77ddac846c98c5473de0372ba5c463480043a07bff42 \ + --hash=sha256:96121edfa4c2dfdda409877ea8608dd01de816a4dc4a0523356067b305e4e17a \ + --hash=sha256:a1f406a8e0995d654b2ad87c62caf6befa767885301f3b8f6f73e6f3c31ec3a6 \ + --hash=sha256:a321c61477ff8ee705b8a5fed370b5710c56b3a52d17b983d9215861e37b642a \ + --hash=sha256:a5761c70c017c1b0d21b0815a920ffb94a670c8d5d409d9b38857874c21f70d7 \ + --hash=sha256:a9abbccd778d98e9c7e85038e35e91e67f5b520776781d9a1e2ee9d400869487 \ + --hash=sha256:ad80e6b4a0c3cb6f10f29ae4c60e991f424e6b14219d46f1e7d442b938ee68a4 \ + --hash=sha256:b44674870709017e4b4036e3d0d6c17f06a0e6d4436422e0ad29b882c40697d2 \ + --hash=sha256:b571bf5341ba8c6bc02e0baeaf3b061ab993bf372d982ae509807e7f112554e9 \ + --hash=sha256:b8194fb8e50d556d5849753de991d390c5a1edeeba50f68e3a9253fbd8bf8ccd \ + --hash=sha256:b87eb6fc9e1bb8f98892a2458781348fa37e6925f35bb6ceb9d4afd54ba36c73 \ + --hash=sha256:bbb5cc845a0292e0c520656d19d7ce40e18d0e19b22cb3e0409135a575bf79fc \ + --hash=sha256:be945402e03de47ba1872cd5236395e0f4ad635526185a930735f66710e1bd3f \ + --hash=sha256:bf13d564d310c156d1c8e53877baf2993fb3073b2fc9f69790ca6a732eb4bfea \ + --hash=sha256:cf60dd2696b457b710dd40bf17ad269d5f5457b96442f7f85722bdb16fa6c899 \ + --hash=sha256:d1ba00ae33be84066cfbe7361d4e04dec78445b2b88bdb734d0d1cbab916025a \ + --hash=sha256:d39fc4817fd67b3915256af5dda75fd4ee10621a3d484524487e33416c6f3543 \ + --hash=sha256:d766a4f0e5aa1ba056ec3496243150698dc0481902e2b8559314368717be82b1 \ + --hash=sha256:dbf364b4c5e7bae9250528167dfe40219b62e2d573c854d74be213e1e52069f7 \ + --hash=sha256:dd19608788b50eed889e13a5d71d832edc34fc9dfce606f66e8f9f917eef910d \ + --hash=sha256:e013b07ba1c748dacc2a80e69a46286ff145935f260eb8c72df7185bf048f502 \ + --hash=sha256:e5d2b9be5b0693cf21eb4ce0ec8d211efb43966f6657807f6859aab3814f946b \ + --hash=sha256:e5ff52d790c7e1628241ffbcaeb33e07d14b007b6eb00a19320c7b8a7024c040 \ + --hash=sha256:e75a2ad7b647fd8046d58c3132d7eaf31b12d8a53c0e4b21fa9c4d23d6ee6d3c \ + --hash=sha256:e7ac22a0bb2c7c49f441f7a6d46c9c80d96e56f5a8bc6972529ed43c8b694e27 \ + --hash=sha256:ed2144b8a78f9d94d9515963ed273d620e07846acd5d4b0a642d4849e8d91a0c \ + --hash=sha256:f017a61399f13aa6d1039f75cd467be388d157cd81f1a119b9d9a68ba6f2830d \ + --hash=sha256:f1d8a2a57b47142b10374902777e798784abf400a004b14f1b0b9eaf1e528ba4 \ + --hash=sha256:f2d32f95922927186c6dbc8bc60df0d186b6edb828d299ab10898ef3f40052fe \ + --hash=sha256:f319bae0321bc838e205bf9e5bc28f0a3165f30c203b610f17ab5552cff90323 \ + --hash=sha256:f3c38e4e5ccbdc9198aecc766cedbb134b2d89bf64533973678dfcf07effd883 \ + --hash=sha256:f9983d01d7705b2d1f7a95e10bbe4091fabc03a46881a256c2787637b087003f \ + --hash=sha256:fa260de59dfb143af06dcf30c2be0b200bed2a73737a8a59248fcb9fa601ef0f +decorator==5.2.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360 \ --hash=sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a -distlib==0.3.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +distlib==0.3.9 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 -exceptiongroup==1.2.2 ; python_version >= "3.10" and python_version < "3.11" \ +exceptiongroup==1.2.2 ; python_version == "3.10" \ --hash=sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b \ --hash=sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc -executing==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +executing==2.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa \ --hash=sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755 -filelock==3.18.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +filelock==3.18.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2 \ --hash=sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de +hip-python==6.3.3.540.31 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:254aba3a63a5f8910606539e59de710845515e0c2fb2bdcbbc2dc4c18754dfbc \ + --hash=sha256:5c0da34b3872d31f8bf2f857bcdedfb84f9740c3fd3e1ace026fde65219815d8 \ + --hash=sha256:6ce1b96f63f3427b4433428505c3c2d7b3959a253751decfe1ccdc05407d8c0d \ + --hash=sha256:77b54fa241ddd565ad4d39f927633c75a69708762987f77392657692b247b24d \ + --hash=sha256:7f9dbfbda4baf7e11a3eb3ddf728622880c79f3c525c68161566c9e471da4f4b \ + --hash=sha256:994ef7f5b04be6c9544ef943eeac7cc282fc9583f621bfd615d18b7a812ef9ce importlib-metadata==8.6.1 ; python_version >= "3.10" and python_full_version < "3.10.2" \ --hash=sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e \ --hash=sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580 -iniconfig==2.1.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +iniconfig==2.1.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7 \ --hash=sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760 -ipython==8.34.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +ipython-pygments-lexers==1.1.1 ; python_version >= "3.11" and python_version < "4" \ + --hash=sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81 \ + --hash=sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c +ipython==8.34.0 ; python_version == "3.10" \ --hash=sha256:0419883fa46e0baa182c5d50ebb8d6b49df1889fdb70750ad6d8cfe678eda6e3 \ --hash=sha256:c31d658e754673ecc6514583e7dda8069e47136eb62458816b7d1e6625948b5a -jedi==0.19.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +ipython==9.0.2 ; python_version >= "3.11" and python_version < "4" \ + --hash=sha256:143ef3ea6fb1e1bffb4c74b114051de653ffb7737a3f7ab1670e657ca6ae8c44 \ + --hash=sha256:ec7b479e3e5656bf4f58c652c120494df1820f4f28f522fb7ca09e213c2aab52 +jedi==0.19.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0 \ --hash=sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9 -joblib==1.4.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +joblib==1.4.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6 \ --hash=sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e -jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +jsonschema-specifications==2024.10.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272 \ --hash=sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf -jsonschema==4.23.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +jsonschema==4.23.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4 \ --hash=sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566 -markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ @@ -177,19 +192,19 @@ markupsafe==2.1.5 ; python_version >= "3.10" and python_version <= "3.11" or pyt --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 -matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +matplotlib-inline==0.1.7 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \ --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca -mock==5.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +mock==5.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \ --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f -nox-poetry==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +nox-poetry==1.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:2531a404e3a21eb73fc1a587a548506a8e2c4c1e6e7ef0c1d0d8d6453b7e5d26 \ --hash=sha256:266eea7a0ab3cad7f4121ecc05b76945036db3b67e6e347557f05010a18e2682 -nox==2024.10.9 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +nox==2024.10.9 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 -numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +numpy==1.26.4 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b \ --hash=sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818 \ --hash=sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20 \ @@ -226,10 +241,10 @@ numpy==1.26.4 ; python_version >= "3.10" and python_version <= "3.11" or python_ --hash=sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef \ --hash=sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3 \ --hash=sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f -packaging==24.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +packaging==24.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f -pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pandas==2.2.3 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a \ --hash=sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d \ --hash=sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5 \ @@ -272,176 +287,187 @@ pandas==2.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_ --hash=sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015 \ --hash=sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24 \ --hash=sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319 -parso==0.8.4 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +parso==0.8.4 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18 \ --hash=sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d -pep440==0.1.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pep440==0.1.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:36d6ad73f2b5d07769294cafe183500ac89d848c922a3d3f521b968481880d51 \ --hash=sha256:58b37246cc2b13fee1ca2a3c092cb3704d21ecf621a5bdbb168e44e697f6d04d -pexpect==4.9.0 ; python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \ +pexpect==4.9.0 ; python_version >= "3.10" and python_version < "4" and sys_platform != "win32" and sys_platform != "emscripten" \ --hash=sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523 \ --hash=sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f -platformdirs==4.3.7 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +platformdirs==4.3.7 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94 \ --hash=sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351 -pluggy==1.5.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pluggy==1.5.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 -prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +prompt-toolkit==3.0.50 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab \ --hash=sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198 -ptyprocess==0.7.0 ; python_version >= "3.10" and python_version <= "3.11" and os_name != "nt" or python_version >= "3.10" and python_version <= "3.11" and (sys_platform != "win32" and sys_platform != "emscripten") or python_version >= "3.12" and python_version < "4" and os_name != "nt" or python_version >= "3.12" and python_version < "4" and (sys_platform != "win32" and sys_platform != "emscripten") \ +ptyprocess==0.7.0 ; python_version >= "3.10" and python_version < "4" and (os_name != "nt" or sys_platform != "win32" and sys_platform != "emscripten") \ --hash=sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35 \ --hash=sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220 -pure-eval==0.2.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pure-eval==0.2.3 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0 \ --hash=sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42 -pygments==2.19.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pygments==2.19.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \ --hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c -pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pyproject-hooks==1.2.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \ --hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913 -pytest-cov==5.0.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pytest-cov==5.0.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \ --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857 -pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pytest-timeout==2.3.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:12397729125c6ecbdaca01035b9e5239d4db97352320af155b3f5de1ba5165d9 \ --hash=sha256:68188cb703edfc6a18fad98dc25a3c61e9f24d644b0b70f33af545219fc7813e -pytest==8.3.5 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pytest==8.3.5 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820 \ --hash=sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845 -python-constraint2==2.2.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ - --hash=sha256:0a841b088076d9dc481989359076b439d5201126583d920173ed9ab9cf7c4771 \ - --hash=sha256:0f0acfbae77ef7fcbff25d1c46b2360e0c486667c1a595b5c7cd4a6540cad5e6 \ - --hash=sha256:203b740a78266123e36d88215bb232e5e682c5845b674d2d5b1218fb3394ff1f \ - --hash=sha256:298c322c157ae6f5a9a9b8de3d08eefcdfed7b78e4abb2ddffe1bd345ed7997b \ - --hash=sha256:348ee17de0de028b68bf8050af142adfae37b500e60ac6758dc499bc19712805 \ - --hash=sha256:46cb1946fc7fb63262c43d4366f8cfceb551fb7a2bf10f275ac236d968746e02 \ - --hash=sha256:48c4f8ca1573f08bb6ef900cbe2e642aa6afb77c11a1f7c9d42c054fcfd93b8b \ - --hash=sha256:7bf723afbfdd13155f38d1344b015fd962818fdf70cdf39005a6a5bf810e5001 \ - --hash=sha256:85ea5330b12ccb4a474c89e3fdd037c5173db0216985da0e9a5bc20f6e26d0ca \ - --hash=sha256:8a39fecbb893137814a4f0ce82fd78df68789d658c6991bb6d57d773a6f8878d \ - --hash=sha256:aae18d318fd5150cda3befcf40b178a8dc661abb79cf663fefb7edd6e3afd6ab \ - --hash=sha256:b4d6159d05204cddfa4e46eef24a10f1d6aed41a905ca83314f5d1caa31599ab \ - --hash=sha256:c337839cfb0b3559f2f211e2ae67993c7187abf5dddbc5b587fe26b7c1b5d0b0 \ - --hash=sha256:c3b887f073f59cf5151df3cd25c2142016676da9034d5af56478c735526882d3 \ - --hash=sha256:d060b179461f09ee6571222ee63b4ac8dafdb6a41ffa75296a2f6b07a6bc500e \ - --hash=sha256:f1590a5699e1097f0057513e64bac4ac2d11f5848467c1c27967e1217f8bec3d -python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +python-constraint2==2.2.2 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:02dcdf6d6f2d403b6304dddb242ef1b3db791600c7b8f8cd895dc3f87509bc6e \ + --hash=sha256:0951ff7ee0d549037ed078ecf828f33003730531a7231f9773c3674553362efa \ + --hash=sha256:21909f3e0dc12448eb4a317f109fb01bc766fb8db20cebc84493ed63f3db8670 \ + --hash=sha256:26af50fda2f0eecea191b9ec5203ededdeda4e16bc5987d0362997dbac01cf9a \ + --hash=sha256:26d5360d7b8563c2b2e25ad11398a5ed5fff346ad3c8be4ee9869fb52c4f921b \ + --hash=sha256:339ee2b5efc8ba19d5a789e6fea848c1d053bea52a7064bfff2c1414a8de46f2 \ + --hash=sha256:5fd97d5b23cb5cc1b18e223745d16d4dcaccdc6104aeb537609ce9aadd1a65da \ + --hash=sha256:90ba6f209b8e91d6e53390af37bcbde11d2b2e38bd7383b7934a7301e5d36775 \ + --hash=sha256:94de42fc08dfb4fcef800d1d974609a25a842983543f96f97440ab2694c8763a \ + --hash=sha256:b3dc9e4ff47941f9ca74789bfcc3340c55804b492a71f6bc532c7a52b739f2b4 \ + --hash=sha256:b4be72425c887537f224c45118713cc49f996f41e7e684b74a07379ac07dd7a5 \ + --hash=sha256:b60067157956dca4fee0ae619b2eadd3e79cfceb30843cab2e0e07b0d294759e \ + --hash=sha256:b9d13b56d65984f752a6300f737d7907993b8248179cc389a2f8f6ebe24b8ec9 \ + --hash=sha256:bdd4b448c4dcaee76b649ee225ddfc03b613b2dbf611847b346d15f7823ab1e2 \ + --hash=sha256:c6ac87e3d0953218edbcf1f9c4aa9b59aca83aa383f0cc4f0bb2343e39253026 \ + --hash=sha256:e0064a8d4cc88161cd2378cf102fe3453503f116ab6e4932c5f74108aba072ee +python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 -pytz==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +pytz==2025.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3 \ --hash=sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00 -referencing==0.36.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +referencing==0.36.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa \ --hash=sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0 -rpds-py==0.23.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ - --hash=sha256:09cd7dbcb673eb60518231e02874df66ec1296c01a4fcd733875755c02014b19 \ - --hash=sha256:0f3288930b947cbebe767f84cf618d2cbe0b13be476e749da0e6a009f986248c \ - --hash=sha256:0fced9fd4a07a1ded1bac7e961ddd9753dd5d8b755ba8e05acba54a21f5f1522 \ - --hash=sha256:112b8774b0b4ee22368fec42749b94366bd9b536f8f74c3d4175d4395f5cbd31 \ - --hash=sha256:11dd60b2ffddba85715d8a66bb39b95ddbe389ad2cfcf42c833f1bcde0878eaf \ - --hash=sha256:178f8a60fc24511c0eb756af741c476b87b610dba83270fce1e5a430204566a4 \ - --hash=sha256:1b08027489ba8fedde72ddd233a5ea411b85a6ed78175f40285bd401bde7466d \ - --hash=sha256:1bf5be5ba34e19be579ae873da515a2836a2166d8d7ee43be6ff909eda42b72b \ - --hash=sha256:1ed7de3c86721b4e83ac440751329ec6a1102229aa18163f84c75b06b525ad7e \ - --hash=sha256:1eedaaccc9bb66581d4ae7c50e15856e335e57ef2734dbc5fd8ba3e2a4ab3cb6 \ - --hash=sha256:243241c95174b5fb7204c04595852fe3943cc41f47aa14c3828bc18cd9d3b2d6 \ - --hash=sha256:26bb3e8de93443d55e2e748e9fd87deb5f8075ca7bc0502cfc8be8687d69a2ec \ - --hash=sha256:271fa2184cf28bdded86bb6217c8e08d3a169fe0bbe9be5e8d96e8476b707122 \ - --hash=sha256:28358c54fffadf0ae893f6c1050e8f8853e45df22483b7fff2f6ab6152f5d8bf \ - --hash=sha256:285019078537949cecd0190f3690a0b0125ff743d6a53dfeb7a4e6787af154f5 \ - --hash=sha256:2893d778d4671ee627bac4037a075168b2673c57186fb1a57e993465dbd79a93 \ - --hash=sha256:2a54027554ce9b129fc3d633c92fa33b30de9f08bc61b32c053dc9b537266fed \ - --hash=sha256:2c6ae11e6e93728d86aafc51ced98b1658a0080a7dd9417d24bfb955bb09c3c2 \ - --hash=sha256:2cfa07c346a7ad07019c33fb9a63cf3acb1f5363c33bc73014e20d9fe8b01cdd \ - --hash=sha256:35d5631ce0af26318dba0ae0ac941c534453e42f569011585cb323b7774502a5 \ - --hash=sha256:3614d280bf7aab0d3721b5ce0e73434acb90a2c993121b6e81a1c15c665298ac \ - --hash=sha256:3902df19540e9af4cc0c3ae75974c65d2c156b9257e91f5101a51f99136d834c \ - --hash=sha256:3aaf141d39f45322e44fc2c742e4b8b4098ead5317e5f884770c8df0c332da70 \ - --hash=sha256:3d8abf7896a91fb97e7977d1aadfcc2c80415d6dc2f1d0fca5b8d0df247248f3 \ - --hash=sha256:3e77febf227a1dc3220159355dba68faa13f8dca9335d97504abf428469fb18b \ - --hash=sha256:3e9212f52074fc9d72cf242a84063787ab8e21e0950d4d6709886fb62bcb91d5 \ - --hash=sha256:3ee9d6f0b38efb22ad94c3b68ffebe4c47865cdf4b17f6806d6c674e1feb4246 \ - --hash=sha256:4233df01a250b3984465faed12ad472f035b7cd5240ea3f7c76b7a7016084495 \ - --hash=sha256:4263320ed887ed843f85beba67f8b2d1483b5947f2dc73a8b068924558bfeace \ - --hash=sha256:4ab923167cfd945abb9b51a407407cf19f5bee35001221f2911dc85ffd35ff4f \ - --hash=sha256:4caafd1a22e5eaa3732acb7672a497123354bef79a9d7ceed43387d25025e935 \ - --hash=sha256:50fb62f8d8364978478b12d5f03bf028c6bc2af04082479299139dc26edf4c64 \ - --hash=sha256:55ff4151cfd4bc635e51cfb1c59ac9f7196b256b12e3a57deb9e5742e65941ad \ - --hash=sha256:5b98b6c953e5c2bda51ab4d5b4f172617d462eebc7f4bfdc7c7e6b423f6da957 \ - --hash=sha256:5c9ff044eb07c8468594d12602291c635da292308c8c619244e30698e7fc455a \ - --hash=sha256:5e9c206a1abc27e0588cf8b7c8246e51f1a16a103734f7750830a1ccb63f557a \ - --hash=sha256:5fb89edee2fa237584e532fbf78f0ddd1e49a47c7c8cfa153ab4849dc72a35e6 \ - --hash=sha256:633462ef7e61d839171bf206551d5ab42b30b71cac8f10a64a662536e057fdef \ - --hash=sha256:66f8d2a17e5838dd6fb9be6baaba8e75ae2f5fa6b6b755d597184bfcd3cb0eba \ - --hash=sha256:6959bb9928c5c999aba4a3f5a6799d571ddc2c59ff49917ecf55be2bbb4e3722 \ - --hash=sha256:698a79d295626ee292d1730bc2ef6e70a3ab135b1d79ada8fde3ed0047b65a10 \ - --hash=sha256:721f9c4011b443b6e84505fc00cc7aadc9d1743f1c988e4c89353e19c4a968ee \ - --hash=sha256:72e680c1518733b73c994361e4b06441b92e973ef7d9449feec72e8ee4f713da \ - --hash=sha256:75307599f0d25bf6937248e5ac4e3bde5ea72ae6618623b86146ccc7845ed00b \ - --hash=sha256:754fba3084b70162a6b91efceee8a3f06b19e43dac3f71841662053c0584209a \ - --hash=sha256:759462b2d0aa5a04be5b3e37fb8183615f47014ae6b116e17036b131985cb731 \ - --hash=sha256:7938c7b0599a05246d704b3f5e01be91a93b411d0d6cc62275f025293b8a11ce \ - --hash=sha256:7b77e07233925bd33fc0022b8537774423e4c6680b6436316c5075e79b6384f4 \ - --hash=sha256:7e5413d2e2d86025e73f05510ad23dad5950ab8417b7fc6beaad99be8077138b \ - --hash=sha256:7f3240dcfa14d198dba24b8b9cb3b108c06b68d45b7babd9eefc1038fdf7e707 \ - --hash=sha256:7f9682a8f71acdf59fd554b82b1c12f517118ee72c0f3944eda461606dfe7eb9 \ - --hash=sha256:8d67beb6002441faef8251c45e24994de32c4c8686f7356a1f601ad7c466f7c3 \ - --hash=sha256:9441af1d25aed96901f97ad83d5c3e35e6cd21a25ca5e4916c82d7dd0490a4fa \ - --hash=sha256:98b257ae1e83f81fb947a363a274c4eb66640212516becaff7bef09a5dceacaa \ - --hash=sha256:9e9f3a3ac919406bc0414bbbd76c6af99253c507150191ea79fab42fdb35982a \ - --hash=sha256:a1c66e71ecfd2a4acf0e4bd75e7a3605afa8f9b28a3b497e4ba962719df2be57 \ - --hash=sha256:a1e17d8dc8e57d8e0fd21f8f0f0a5211b3fa258b2e444c2053471ef93fe25a00 \ - --hash=sha256:a20cb698c4a59c534c6701b1c24a968ff2768b18ea2991f886bd8985ce17a89f \ - --hash=sha256:a970bfaf130c29a679b1d0a6e0f867483cea455ab1535fb427566a475078f27f \ - --hash=sha256:a98f510d86f689fcb486dc59e6e363af04151e5260ad1bdddb5625c10f1e95f8 \ - --hash=sha256:a9d3b728f5a5873d84cba997b9d617c6090ca5721caaa691f3b1a78c60adc057 \ - --hash=sha256:ad76f44f70aac3a54ceb1813ca630c53415da3a24fd93c570b2dfb4856591017 \ - --hash=sha256:ae28144c1daa61366205d32abd8c90372790ff79fc60c1a8ad7fd3c8553a600e \ - --hash=sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165 \ - --hash=sha256:b5a96fcac2f18e5a0a23a75cd27ce2656c66c11c127b0318e508aab436b77428 \ - --hash=sha256:b5ef909a37e9738d146519657a1aab4584018746a18f71c692f2f22168ece40c \ - --hash=sha256:b79f5ced71efd70414a9a80bbbfaa7160da307723166f09b69773153bf17c590 \ - --hash=sha256:b91cceb5add79ee563bd1f70b30896bd63bc5f78a11c1f00a1e931729ca4f1f4 \ - --hash=sha256:b92f5654157de1379c509b15acec9d12ecf6e3bc1996571b6cb82a4302060447 \ - --hash=sha256:c04ca91dda8a61584165825907f5c967ca09e9c65fe8966ee753a3f2b019fe1e \ - --hash=sha256:c1f8afa346ccd59e4e5630d5abb67aba6a9812fddf764fd7eb11f382a345f8cc \ - --hash=sha256:c5334a71f7dc1160382d45997e29f2637c02f8a26af41073189d79b95d3321f1 \ - --hash=sha256:c617d7453a80e29d9973b926983b1e700a9377dbe021faa36041c78537d7b08c \ - --hash=sha256:c632419c3870507ca20a37c8f8f5352317aca097639e524ad129f58c125c61c6 \ - --hash=sha256:c6760211eee3a76316cf328f5a8bd695b47b1626d21c8a27fb3b2473a884d597 \ - --hash=sha256:c698d123ce5d8f2d0cd17f73336615f6a2e3bdcedac07a1291bb4d8e7d82a05a \ - --hash=sha256:c76b32eb2ab650a29e423525e84eb197c45504b1c1e6e17b6cc91fcfeb1a4b1d \ - --hash=sha256:c8f7e90b948dc9dcfff8003f1ea3af08b29c062f681c05fd798e36daa3f7e3e8 \ - --hash=sha256:c9e799dac1ffbe7b10c1fd42fe4cd51371a549c6e108249bde9cd1200e8f59b4 \ - --hash=sha256:cafa48f2133d4daa028473ede7d81cd1b9f9e6925e9e4003ebdf77010ee02f35 \ - --hash=sha256:ce473a2351c018b06dd8d30d5da8ab5a0831056cc53b2006e2a8028172c37ce5 \ - --hash=sha256:d31ed4987d72aabdf521eddfb6a72988703c091cfc0064330b9e5f8d6a042ff5 \ - --hash=sha256:d550d7e9e7d8676b183b37d65b5cd8de13676a738973d330b59dc8312df9c5dc \ - --hash=sha256:d6adb81564af0cd428910f83fa7da46ce9ad47c56c0b22b50872bc4515d91966 \ - --hash=sha256:d6f6512a90bd5cd9030a6237f5346f046c6f0e40af98657568fa45695d4de59d \ - --hash=sha256:d7031d493c4465dbc8d40bd6cafefef4bd472b17db0ab94c53e7909ee781b9ef \ - --hash=sha256:d9f75a06ecc68f159d5d7603b734e1ff6daa9497a929150f794013aa9f6e3f12 \ - --hash=sha256:db7707dde9143a67b8812c7e66aeb2d843fe33cc8e374170f4d2c50bd8f2472d \ - --hash=sha256:e0397dd0b3955c61ef9b22838144aa4bef6f0796ba5cc8edfc64d468b93798b4 \ - --hash=sha256:e0df046f2266e8586cf09d00588302a32923eb6386ced0ca5c9deade6af9a149 \ - --hash=sha256:e14f86b871ea74c3fddc9a40e947d6a5d09def5adc2076ee61fb910a9014fb35 \ - --hash=sha256:e5963ea87f88bddf7edd59644a35a0feecf75f8985430124c253612d4f7d27ae \ - --hash=sha256:e768267cbe051dd8d1c5305ba690bb153204a09bf2e3de3ae530de955f5b5580 \ - --hash=sha256:e9cb79ecedfc156c0692257ac7ed415243b6c35dd969baa461a6888fc79f2f07 \ - --hash=sha256:ed6f011bedca8585787e5082cce081bac3d30f54520097b2411351b3574e1219 \ - --hash=sha256:f3429fb8e15b20961efca8c8b21432623d85db2228cc73fe22756c6637aa39e7 \ - --hash=sha256:f35eff113ad430b5272bbfc18ba111c66ff525828f24898b4e146eb479a2cdda \ - --hash=sha256:f3a6cb95074777f1ecda2ca4fa7717caa9ee6e534f42b7575a8f0d4cb0c24013 \ - --hash=sha256:f7356a6da0562190558c4fcc14f0281db191cdf4cb96e7604c06acfcee96df15 \ - --hash=sha256:f88626e3f5e57432e6191cd0c5d6d6b319b635e70b40be2ffba713053e5147dd \ - --hash=sha256:fad784a31869747df4ac968a351e070c06ca377549e4ace94775aaa3ab33ee06 \ - --hash=sha256:fc869af5cba24d45fb0399b0cfdbcefcf6910bf4dee5d74036a57cf5264b3ff4 \ - --hash=sha256:fee513135b5a58f3bb6d89e48326cd5aa308e4bcdf2f7d59f67c861ada482bf8 -ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +rpds-py==0.24.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:0047638c3aa0dbcd0ab99ed1e549bbf0e142c9ecc173b6492868432d8989a046 \ + --hash=sha256:006f4342fe729a368c6df36578d7a348c7c716be1da0a1a0f86e3021f8e98724 \ + --hash=sha256:041f00419e1da7a03c46042453598479f45be3d787eb837af382bfc169c0db33 \ + --hash=sha256:04ecf5c1ff4d589987b4d9882872f80ba13da7d42427234fce8f22efb43133bc \ + --hash=sha256:04f2b712a2206e13800a8136b07aaedc23af3facab84918e7aa89e4be0260032 \ + --hash=sha256:0aeb3329c1721c43c58cae274d7d2ca85c1690d89485d9c63a006cb79a85771a \ + --hash=sha256:0e374c0ce0ca82e5b67cd61fb964077d40ec177dd2c4eda67dba130de09085c7 \ + --hash=sha256:0f00c16e089282ad68a3820fd0c831c35d3194b7cdc31d6e469511d9bffc535c \ + --hash=sha256:174e46569968ddbbeb8a806d9922f17cd2b524aa753b468f35b97ff9c19cb718 \ + --hash=sha256:1b221c2457d92a1fb3c97bee9095c874144d196f47c038462ae6e4a14436f7bc \ + --hash=sha256:208b3a70a98cf3710e97cabdc308a51cd4f28aa6e7bb11de3d56cd8b74bab98d \ + --hash=sha256:20f2712bd1cc26a3cc16c5a1bfee9ed1abc33d4cdf1aabd297fe0eb724df4272 \ + --hash=sha256:24795c099453e3721fda5d8ddd45f5dfcc8e5a547ce7b8e9da06fecc3832e26f \ + --hash=sha256:2a0f156e9509cee987283abd2296ec816225145a13ed0391df8f71bf1d789e2d \ + --hash=sha256:2b2356688e5d958c4d5cb964af865bea84db29971d3e563fb78e46e20fe1848b \ + --hash=sha256:2c13777ecdbbba2077670285dd1fe50828c8742f6a4119dbef6f83ea13ad10fb \ + --hash=sha256:2d3ee4615df36ab8eb16c2507b11e764dcc11fd350bbf4da16d09cda11fcedef \ + --hash=sha256:2d53747da70a4e4b17f559569d5f9506420966083a31c5fbd84e764461c4444b \ + --hash=sha256:32bab0a56eac685828e00cc2f5d1200c548f8bc11f2e44abf311d6b548ce2e45 \ + --hash=sha256:34d90ad8c045df9a4259c47d2e16a3f21fdb396665c94520dbfe8766e62187a4 \ + --hash=sha256:369d9c6d4c714e36d4a03957b4783217a3ccd1e222cdd67d464a3a479fc17796 \ + --hash=sha256:3a55fc10fdcbf1a4bd3c018eea422c52cf08700cf99c28b5cb10fe97ab77a0d3 \ + --hash=sha256:3d2d8e4508e15fc05b31285c4b00ddf2e0eb94259c2dc896771966a163122a0c \ + --hash=sha256:3fab5f4a2c64a8fb64fc13b3d139848817a64d467dd6ed60dcdd6b479e7febc9 \ + --hash=sha256:43dba99f00f1d37b2a0265a259592d05fcc8e7c19d140fe51c6e6f16faabeb1f \ + --hash=sha256:44d51febb7a114293ffd56c6cf4736cb31cd68c0fddd6aa303ed09ea5a48e029 \ + --hash=sha256:493fe54318bed7d124ce272fc36adbf59d46729659b2c792e87c3b95649cdee9 \ + --hash=sha256:4b28e5122829181de1898c2c97f81c0b3246d49f585f22743a1246420bb8d399 \ + --hash=sha256:4cd031e63bc5f05bdcda120646a0d32f6d729486d0067f09d79c8db5368f4586 \ + --hash=sha256:528927e63a70b4d5f3f5ccc1fa988a35456eb5d15f804d276709c33fc2f19bda \ + --hash=sha256:564c96b6076a98215af52f55efa90d8419cc2ef45d99e314fddefe816bc24f91 \ + --hash=sha256:5db385bacd0c43f24be92b60c857cf760b7f10d8234f4bd4be67b5b20a7c0b6b \ + --hash=sha256:5ef877fa3bbfb40b388a5ae1cb00636a624690dcb9a29a65267054c9ea86d88a \ + --hash=sha256:5f6e3cec44ba05ee5cbdebe92d052f69b63ae792e7d05f1020ac5e964394080c \ + --hash=sha256:5fc13b44de6419d1e7a7e592a4885b323fbc2f46e1f22151e3a8ed3b8b920405 \ + --hash=sha256:60748789e028d2a46fc1c70750454f83c6bdd0d05db50f5ae83e2db500b34da5 \ + --hash=sha256:60d9b630c8025b9458a9d114e3af579a2c54bd32df601c4581bd054e85258143 \ + --hash=sha256:619ca56a5468f933d940e1bf431c6f4e13bef8e688698b067ae68eb4f9b30e3a \ + --hash=sha256:630d3d8ea77eabd6cbcd2ea712e1c5cecb5b558d39547ac988351195db433f6c \ + --hash=sha256:63981feca3f110ed132fd217bf7768ee8ed738a55549883628ee3da75bb9cb78 \ + --hash=sha256:66420986c9afff67ef0c5d1e4cdc2d0e5262f53ad11e4f90e5e22448df485bf0 \ + --hash=sha256:675269d407a257b8c00a6b58205b72eec8231656506c56fd429d924ca00bb350 \ + --hash=sha256:6a4a535013aeeef13c5532f802708cecae8d66c282babb5cd916379b72110cf7 \ + --hash=sha256:6a727fd083009bc83eb83d6950f0c32b3c94c8b80a9b667c87f4bd1274ca30ba \ + --hash=sha256:6e1daf5bf6c2be39654beae83ee6b9a12347cb5aced9a29eecf12a2d25fff664 \ + --hash=sha256:6eea559077d29486c68218178ea946263b87f1c41ae7f996b1f30a983c476a5a \ + --hash=sha256:75a810b7664c17f24bf2ffd7f92416c00ec84b49bb68e6a0d93e542406336b56 \ + --hash=sha256:772cc1b2cd963e7e17e6cc55fe0371fb9c704d63e44cacec7b9b7f523b78919e \ + --hash=sha256:78884d155fd15d9f64f5d6124b486f3d3f7fd7cd71a78e9670a0f6f6ca06fb2d \ + --hash=sha256:79e8d804c2ccd618417e96720ad5cd076a86fa3f8cb310ea386a3e6229bae7d1 \ + --hash=sha256:7e80d375134ddb04231a53800503752093dbb65dad8dabacce2c84cccc78e964 \ + --hash=sha256:8097b3422d020ff1c44effc40ae58e67d93e60d540a65649d2cdaf9466030791 \ + --hash=sha256:8205ee14463248d3349131bb8099efe15cd3ce83b8ef3ace63c7e976998e7124 \ + --hash=sha256:8212ff58ac6dfde49946bea57474a386cca3f7706fc72c25b772b9ca4af6b79e \ + --hash=sha256:823e74ab6fbaa028ec89615ff6acb409e90ff45580c45920d4dfdddb069f2120 \ + --hash=sha256:84e0566f15cf4d769dade9b366b7b87c959be472c92dffb70462dd0844d7cbad \ + --hash=sha256:896c41007931217a343eff197c34513c154267636c8056fb409eafd494c3dcdc \ + --hash=sha256:8aa362811ccdc1f8dadcc916c6d47e554169ab79559319ae9fae7d7752d0d60c \ + --hash=sha256:8b3b397eefecec8e8e39fa65c630ef70a24b09141a6f9fc17b3c3a50bed6b50e \ + --hash=sha256:8ebc7e65ca4b111d928b669713865f021b7773350eeac4a31d3e70144297baba \ + --hash=sha256:9168764133fd919f8dcca2ead66de0105f4ef5659cbb4fa044f7014bed9a1797 \ + --hash=sha256:921ae54f9ecba3b6325df425cf72c074cd469dea843fb5743a26ca7fb2ccb149 \ + --hash=sha256:92558d37d872e808944c3c96d0423b8604879a3d1c86fdad508d7ed91ea547d5 \ + --hash=sha256:951cc481c0c395c4a08639a469d53b7d4afa252529a085418b82a6b43c45c240 \ + --hash=sha256:998c01b8e71cf051c28f5d6f1187abbdf5cf45fc0efce5da6c06447cba997034 \ + --hash=sha256:9abc80fe8c1f87218db116016de575a7998ab1629078c90840e8d11ab423ee25 \ + --hash=sha256:9be4f99bee42ac107870c61dfdb294d912bf81c3c6d45538aad7aecab468b6b7 \ + --hash=sha256:9c39438c55983d48f4bb3487734d040e22dad200dab22c41e331cee145e7a50d \ + --hash=sha256:9d7e8ce990ae17dda686f7e82fd41a055c668e13ddcf058e7fb5e9da20b57793 \ + --hash=sha256:9ea7f4174d2e4194289cb0c4e172d83e79a6404297ff95f2875cf9ac9bced8ba \ + --hash=sha256:a18fc371e900a21d7392517c6f60fe859e802547309e94313cd8181ad9db004d \ + --hash=sha256:a36b452abbf29f68527cf52e181fced56685731c86b52e852053e38d8b60bc8d \ + --hash=sha256:a5b66d1b201cc71bc3081bc2f1fc36b0c1f268b773e03bbc39066651b9e18391 \ + --hash=sha256:a824d2c7a703ba6daaca848f9c3d5cb93af0505be505de70e7e66829affd676e \ + --hash=sha256:a88c0d17d039333a41d9bf4616bd062f0bd7aa0edeb6cafe00a2fc2a804e944f \ + --hash=sha256:aa6800adc8204ce898c8a424303969b7aa6a5e4ad2789c13f8648739830323b7 \ + --hash=sha256:aad911555286884be1e427ef0dc0ba3929e6821cbeca2194b13dc415a462c7fd \ + --hash=sha256:afc6e35f344490faa8276b5f2f7cbf71f88bc2cda4328e00553bd451728c571f \ + --hash=sha256:b9a4df06c35465ef4d81799999bba810c68d29972bf1c31db61bfdb81dd9d5bb \ + --hash=sha256:bb2954155bb8f63bb19d56d80e5e5320b61d71084617ed89efedb861a684baea \ + --hash=sha256:bbc4362e06f950c62cad3d4abf1191021b2ffaf0b31ac230fbf0526453eee75e \ + --hash=sha256:c0145295ca415668420ad142ee42189f78d27af806fcf1f32a18e51d47dd2052 \ + --hash=sha256:c30ff468163a48535ee7e9bf21bd14c7a81147c0e58a36c1078289a8ca7af0bd \ + --hash=sha256:c347a20d79cedc0a7bd51c4d4b7dbc613ca4e65a756b5c3e57ec84bd43505b47 \ + --hash=sha256:c43583ea8517ed2e780a345dd9960896afc1327e8cf3ac8239c167530397440d \ + --hash=sha256:c61a2cb0085c8783906b2f8b1f16a7e65777823c7f4d0a6aaffe26dc0d358dd9 \ + --hash=sha256:c9ca89938dff18828a328af41ffdf3902405a19f4131c88e22e776a8e228c5a8 \ + --hash=sha256:cc31e13ce212e14a539d430428cd365e74f8b2d534f8bc22dd4c9c55b277b875 \ + --hash=sha256:cdabcd3beb2a6dca7027007473d8ef1c3b053347c76f685f5f060a00327b8b65 \ + --hash=sha256:cf86f72d705fc2ef776bb7dd9e5fbba79d7e1f3e258bf9377f8204ad0fc1c51e \ + --hash=sha256:d09dc82af2d3c17e7dd17120b202a79b578d79f2b5424bda209d9966efeed114 \ + --hash=sha256:d3aa13bdf38630da298f2e0d77aca967b200b8cc1473ea05248f6c5e9c9bdb44 \ + --hash=sha256:d69d003296df4840bd445a5d15fa5b6ff6ac40496f956a221c4d1f6f7b4bc4d9 \ + --hash=sha256:d6e109a454412ab82979c5b1b3aee0604eca4bbf9a02693bb9df027af2bfa91a \ + --hash=sha256:d8551e733626afec514b5d15befabea0dd70a343a9f23322860c4f16a9430205 \ + --hash=sha256:d8754d872a5dfc3c5bf9c0e059e8107451364a30d9fd50f1f1a85c4fb9481164 \ + --hash=sha256:d8f9a6e7fd5434817526815f09ea27f2746c4a51ee11bb3439065f5fc754db58 \ + --hash=sha256:dbcbb6db5582ea33ce46a5d20a5793134b5365110d84df4e30b9d37c6fd40ad3 \ + --hash=sha256:e0f3ef95795efcd3b2ec3fe0a5bcfb5dadf5e3996ea2117427e524d4fbf309c6 \ + --hash=sha256:e13ae74a8a3a0c2f22f450f773e35f893484fcfacb00bb4344a7e0f4f48e1f97 \ + --hash=sha256:e274f62cbd274359eff63e5c7e7274c913e8e09620f6a57aae66744b3df046d6 \ + --hash=sha256:e838bf2bb0b91ee67bf2b889a1a841e5ecac06dd7a2b1ef4e6151e2ce155c7ae \ + --hash=sha256:e8acd55bd5b071156bae57b555f5d33697998752673b9de554dd82f5b5352727 \ + --hash=sha256:e8e5ab32cf9eb3647450bc74eb201b27c185d3857276162c101c0f8c6374e098 \ + --hash=sha256:ebcb786b9ff30b994d5969213a8430cbb984cdd7ea9fd6df06663194bd3c450c \ + --hash=sha256:ebea2821cdb5f9fef44933617be76185b80150632736f3d76e54829ab4a3b4d1 \ + --hash=sha256:ed0ef550042a8dbcd657dfb284a8ee00f0ba269d3f2286b0493b15a5694f9fe8 \ + --hash=sha256:eda5c1e2a715a4cbbca2d6d304988460942551e4e5e3b7457b50943cd741626d \ + --hash=sha256:f5c0ed12926dec1dfe7d645333ea59cf93f4d07750986a586f511c0bc61fe103 \ + --hash=sha256:f6016bd950be4dcd047b7475fdf55fb1e1f59fc7403f387be0e8123e4a576d30 \ + --hash=sha256:f9e0057a509e096e47c87f753136c9b10d7a91842d8042c2ee6866899a717c0d \ + --hash=sha256:fc1c892b1ec1f8cbd5da8de287577b455e388d9c328ad592eabbdcb6fc93bee5 \ + --hash=sha256:fc2c1e1b00f88317d9de6b2c2b39b012ebbfe35fe5e7bef980fd2a91f6100a07 \ + --hash=sha256:fd822f019ccccd75c832deb7aa040bb02d70a92eb15a2f16c7987b7ad4ee8d83 +ruff==0.4.10 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6 \ --hash=sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739 \ --hash=sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d \ @@ -459,7 +485,7 @@ ruff==0.4.10 ; python_version >= "3.10" and python_version <= "3.11" or python_v --hash=sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81 \ --hash=sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0 \ --hash=sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca -scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691 \ --hash=sha256:0c8d036eb937dbb568c6242fa598d551d88fb4399c0344d95c001980ec1c7d36 \ --hash=sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f \ @@ -490,7 +516,7 @@ scikit-learn==1.6.1 ; python_version >= "3.10" and python_version <= "3.11" or p --hash=sha256:e7be3fa5d2eb9be7d77c3734ff1d599151bb523674be9b834e8da6abe132f44e \ --hash=sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97 \ --hash=sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415 -scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +scipy==1.15.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:01edfac9f0798ad6b46d9c4c9ca0e0ad23dbf0b1eb70e96adb9fa7f525eff0bf \ --hash=sha256:03205d57a28e18dfd39f0377d5002725bf1f19a46f444108c29bdb246b6c8a11 \ --hash=sha256:08b57a9336b8e79b305a143c3655cc5bdbe6d5ece3378578888d2afbb51c4e37 \ @@ -537,16 +563,16 @@ scipy==1.15.2 ; python_version >= "3.10" and python_version <= "3.11" or python_ --hash=sha256:f031846580d9acccd0044efd1a90e6f4df3a6e12b4b6bd694a7bc03a89892b28 \ --hash=sha256:fb530e4794fc8ea76a4a21ccb67dea33e5e0e60f07fc38a49e821e1eae3b71a0 \ --hash=sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db -six==1.17.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +six==1.17.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 -stack-data==0.6.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +stack-data==0.6.3 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9 \ --hash=sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695 -threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb \ --hash=sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e -tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +tomli==2.2.1 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ @@ -579,25 +605,25 @@ tomli==2.2.1 ; python_version >= "3.10" and python_version <= "3.11" or python_v --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 -tomlkit==0.13.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +tomlkit==0.13.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde \ --hash=sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79 -traitlets==5.14.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +traitlets==5.14.3 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7 \ --hash=sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f -typing-extensions==4.12.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 -tzdata==2025.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +typing-extensions==4.13.1 ; python_version >= "3.10" and python_version < "3.13" \ + --hash=sha256:4b6cf02909eb5495cfbc3f6e8fd49217e6cc7944e145cdda8caa3734777f9e69 \ + --hash=sha256:98795af00fb9640edec5b8e31fc647597b4691f099ad75f469a2616be1a76dff +tzdata==2025.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8 \ --hash=sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9 -virtualenv==20.29.3 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ - --hash=sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170 \ - --hash=sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac -wcwidth==0.2.13 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +virtualenv==20.30.0 ; python_version >= "3.10" and python_version < "4" \ + --hash=sha256:800863162bcaa5450a6e4d721049730e7f2dae07720e0902b0e4040bd6f9ada8 \ + --hash=sha256:e34302959180fca3af42d1800df014b35019490b119eba981af27f2fa486e5d6 +wcwidth==0.2.13 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859 \ --hash=sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5 -xmltodict==0.14.2 ; python_version >= "3.10" and python_version <= "3.11" or python_version >= "3.12" and python_version < "4" \ +xmltodict==0.14.2 ; python_version >= "3.10" and python_version < "4" \ --hash=sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553 \ --hash=sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac zipp==3.21.0 ; python_version >= "3.10" and python_full_version < "3.10.2" \ diff --git a/doc/source/dev-environment.rst b/doc/source/dev-environment.rst index 570a8c970..0adb3c83e 100644 --- a/doc/source/dev-environment.rst +++ b/doc/source/dev-environment.rst @@ -78,7 +78,7 @@ Steps without :bash:`sudo` access (e.g. on a cluster): * Verify that your development environment has no missing installs or updates with :bash:`poetry install --sync --dry-run --with test`. #. Check if the environment is setup correctly by running :bash:`pytest`. All tests should pass, except if you're not on a GPU node, or one or more extras has been left out in the previous step, then these tests will skip gracefully. #. Set Nox to use the correct backend and location: - * Run :bash:`conda -- create-settings-file` to automatically create a settings file. + * Run :bash:`nox -- create-settings-file` to automatically create a settings file. * In this settings file :bash:`noxsettings.toml`, change the :bash:`venvbackend`: * If you used Mamba in step 2, to :bash:`mamba`. * If you used Miniconda or Anaconda in step 2, to :bash:`conda`. diff --git a/examples/c/vector_add.py b/examples/c/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda-c++/vector_add.py b/examples/cuda-c++/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda-c++/vector_add_blocksize.py b/examples/cuda-c++/vector_add_blocksize.py old mode 100755 new mode 100644 diff --git a/examples/cuda-c++/vector_add_cupy.py b/examples/cuda-c++/vector_add_cupy.py old mode 100755 new mode 100644 diff --git a/examples/cuda/convolution.py b/examples/cuda/convolution.py old mode 100755 new mode 100644 diff --git a/examples/cuda/convolution_correct.py b/examples/cuda/convolution_correct.py old mode 100755 new mode 100644 diff --git a/examples/cuda/convolution_streams.py b/examples/cuda/convolution_streams.py old mode 100755 new mode 100644 diff --git a/examples/cuda/expdist.py b/examples/cuda/expdist.py old mode 100755 new mode 100644 diff --git a/examples/cuda/matmul.py b/examples/cuda/matmul.py old mode 100755 new mode 100644 diff --git a/examples/cuda/pnpoly.py b/examples/cuda/pnpoly.py old mode 100755 new mode 100644 diff --git a/examples/cuda/python_kernel.py b/examples/cuda/python_kernel.py old mode 100755 new mode 100644 diff --git a/examples/cuda/reduction.py b/examples/cuda/reduction.py old mode 100755 new mode 100644 diff --git a/examples/cuda/sepconv.py b/examples/cuda/sepconv.py old mode 100755 new mode 100644 diff --git a/examples/cuda/spmv.py b/examples/cuda/spmv.py old mode 100755 new mode 100644 diff --git a/examples/cuda/stencil.py b/examples/cuda/stencil.py old mode 100755 new mode 100644 diff --git a/examples/cuda/test_vector_add.py b/examples/cuda/test_vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda/test_vector_add_parameterized.py b/examples/cuda/test_vector_add_parameterized.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add.py b/examples/cuda/vector_add.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_codegen.py b/examples/cuda/vector_add_codegen.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_cupy.py b/examples/cuda/vector_add_cupy.py old mode 100755 new mode 100644 diff --git a/examples/cuda/vector_add_custom_strategy.py b/examples/cuda/vector_add_custom_strategy.py new file mode 100644 index 000000000..29d873d5d --- /dev/null +++ b/examples/cuda/vector_add_custom_strategy.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +"""This is the minimal example from the README""" + +import numpy +import kernel_tuner +from kernel_tuner import tune_kernel +from kernel_tuner.file_utils import store_output_file, store_metadata_file + +def tune(): + + kernel_string = """ + __global__ void vector_add(float *c, float *a, float *b, int n) { + int i = blockIdx.x * block_size_x + threadIdx.x; + if (i 0 else kernel_instance.kernel_source.kernel_name - experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, applications, gpus, - override=override, overwrite_existing_file=True) + experiments_filepath = generate_experiment_file(name, path, searchspace_strategies, self.applications, self.gpus, + override=self.override, generate_unique_file=True, overwrite_existing_file=True) return str(experiments_filepath) def start_event(self): @@ -114,12 +150,27 @@ def synchronize(self): return super().synchronize() def run_kernel(self, func, gpu_args=None, threads=None, grid=None, stream=None): + # from cProfile import Profile + + # # generate the experiments file + # experiments_filepath = Path(func) + + # # run the methodology to get a fitness score for this configuration + # with Profile() as pr: + # scores = get_strategy_scores(str(experiments_filepath), full_validate_on_load=False) + # pr.dump_stats('diff_evo_hypertune_hotspot.prof') + # self.last_score = scores[list(scores.keys())[0]]['score'] + # raise ValueError(scores) + # generate the experiments file experiments_filepath = Path(func) # run the methodology to get a fitness score for this configuration - scores = get_strategy_scores(str(experiments_filepath)) + scores = get_strategy_scores(str(experiments_filepath), full_validate_on_load=False) self.last_score = scores[list(scores.keys())[0]]['score'] + + # remove the experiments file + experiments_filepath.unlink() def memset(self, allocation, value, size): return super().memset(allocation, value, size) @@ -129,3 +180,7 @@ def memcpy_dtoh(self, dest, src): def memcpy_htod(self, dest, src): return super().memcpy_htod(dest, src) + + def refresh_memory(self, device_memory, host_arguments, should_sync): + """This is a no-op for the hypertuner backend, as it does not manage memory directly.""" + pass diff --git a/kernel_tuner/core.py b/kernel_tuner/core.py index 9b9318cd2..5352ced74 100644 --- a/kernel_tuner/core.py +++ b/kernel_tuner/core.py @@ -315,10 +315,13 @@ def __init__( observers=observers, ) elif lang.upper() == "HYPERTUNER": - dev = HypertunerFunctions(iterations=iterations) + dev = HypertunerFunctions( + iterations=iterations, + compiler_options=compiler_options + ) self.requires_warmup = False else: - raise ValueError( + raise NotImplementedError( "Sorry, support for languages other than CUDA, OpenCL, HIP, C, and Fortran is not implemented yet" ) self.dev = dev diff --git a/kernel_tuner/file_utils.py b/kernel_tuner/file_utils.py index 2b75cc023..7684eeb84 100644 --- a/kernel_tuner/file_utils.py +++ b/kernel_tuner/file_utils.py @@ -3,6 +3,7 @@ import json import subprocess from importlib.metadata import PackageNotFoundError, requires, version +from importlib.util import spec_from_file_location, module_from_spec from pathlib import Path from sys import platform @@ -152,7 +153,7 @@ def get_t4_results(results, tune_params, objective="time"): # write output_data to a JSON file version, _ = output_file_schema("results") - output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'miliseconds'}) + output_json = dict(results=output_data, schema_version=version, metadata={'timeunit': 'milliseconds'}) return output_json def store_output_file(output_filename: str, results, tune_params, objective="time"): @@ -302,3 +303,25 @@ def store_metadata_file(metadata_filename: str): with open(metadata_filenamepath, "w+") as fh: json.dump(metadata_json, fh, indent=" ") +def import_class_from_file(file_path: Path, class_name): + """Import a class from a file.""" + + def load_module(module_name): + spec = spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Could not load spec from {file_path}") + + # create a module from the spec and execute it + module = module_from_spec(spec) + spec.loader.exec_module(module) + if not hasattr(module, class_name): + raise ImportError(f"Module '{module_name}' has no class '{class_name}'") + return module + + try: + module = load_module(file_path.stem) + except ImportError: + module = load_module(f"{file_path.parent.stem}.{file_path.stem}") + + # return the class from the module + return getattr(module, class_name) diff --git a/kernel_tuner/hyper.py b/kernel_tuner/hyper.py index 867f2ac0e..90b61a9b9 100644 --- a/kernel_tuner/hyper.py +++ b/kernel_tuner/hyper.py @@ -3,6 +3,9 @@ from pathlib import Path from random import randint +from argparse import ArgumentParser + +import numpy as np import kernel_tuner @@ -16,7 +19,7 @@ def randpath(): path = randpath() return path -def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs): +def tune_hyper_params(target_strategy: str, hyper_params: dict, restrictions: list, *args, **kwargs): """Tune hyperparameters for a given strategy and kernel. This function is to be called just like tune_kernel, except that you specify a strategy @@ -59,25 +62,32 @@ def tune_hyper_params(target_strategy: str, hyper_params: dict, *args, **kwargs) del kwargs['iterations'] # pass a temporary cache file to avoid duplicate execution - cachefile = get_random_unique_filename('temp_', '.json') - kwargs['cache'] = str(cachefile) + if 'cache' not in kwargs: + cachefile = get_random_unique_filename('temp_', '.json') + cachefile = Path(f"hyperparamtuning_paper_bruteforce_{target_strategy}.json") + kwargs['cache'] = str(cachefile) def put_if_not_present(target_dict, key, value): target_dict[key] = value if key not in target_dict else target_dict[key] + # set default arguments if not provided put_if_not_present(kwargs, "verbose", True) put_if_not_present(kwargs, "quiet", False) - kwargs['simulation_mode'] = False - kwargs['strategy'] = 'dual_annealing' - kwargs['verify'] = None + put_if_not_present(kwargs, "simulation_mode", False) + put_if_not_present(kwargs, "strategy", 'brute_force') + put_if_not_present(kwargs, 'verify', None) arguments = [target_strategy] + # IMPORTANT when running this script in parallel, always make sure the below name is unique among your runs! + # e.g. when parallalizing over the hypertuning of multiple strategies, use the strategy name + name = f"hyperparamtuning_{target_strategy.lower()}" + # execute the hyperparameter tuning - result, env = kernel_tuner.tune_kernel('hyperparamtuning', None, [], arguments, hyper_params, *args, lang='Hypertuner', + result, env = kernel_tuner.tune_kernel(name, None, [], arguments, hyper_params, restrictions=restrictions, *args, lang='Hypertuner', objective='score', objective_higher_is_better=True, iterations=iterations, **kwargs) # remove the temporary cachefile and return only unique results in order - cachefile.unlink() + # cachefile.unlink() result_unique = dict() for r in result: config_id = ",".join(str(r[k]) for k in hyper_params.keys()) @@ -86,13 +96,120 @@ def put_if_not_present(target_dict, key, value): return list(result_unique.values()), env if __name__ == "__main__": - hyperparams = { - 'popsize': [10, 20, 30], - 'maxiter': [50, 100, 150], - 'w': [0.25, 0.5, 0.75], - 'c1': [1.0, 2.0, 3.0], - 'c2': [0.5, 1.0, 1.5] - } - result, env = tune_hyper_params('pso', hyperparams) + """Main function to run the hyperparameter tuning. Run with `python hyper.py strategy_to_tune=`.""" + + parser = ArgumentParser() + parser.add_argument("strategy_to_tune", type=str, help="The strategy to tune hyperparameters for.") + parser.add_argument("--meta_strategy", nargs='?', default="dual_annealing", type=str, help="The meta-strategy to use for hyperparameter tuning.") + parser.add_argument("--max_time", nargs='?', default=60*60*24, type=int, help="The maximum time in seconds for the hyperparameter tuning.") + args = parser.parse_args() + strategy_to_tune = args.strategy_to_tune + + kwargs = dict( + verbose=True, + quiet=False, + simulation_mode=False, + strategy=args.meta_strategy, + cache=f"hyperparamtuning_t={strategy_to_tune}_m={args.meta_strategy}.json", + strategy_options=dict( + time_limit=args.max_time, + ) + ) + + # select the hyperparameter parameters for the selected optimization algorithm + restrictions = [] + if strategy_to_tune.lower() == "pso": + # exhaustive search for PSO hyperparameters + # hyperparams = { + # 'popsize': [10, 20, 30], + # 'maxiter': [50, 100, 150], + # # 'w': [0.25, 0.5, 0.75], # disabled due to low influence according to KW-test (H=0.0215) and mutual information + # 'c1': [1.0, 2.0, 3.0], + # 'c2': [0.5, 1.0, 1.5] + # } + hyperparams = { + 'popsize': list(range(2, 50+1, 2)), + 'maxiter': list(range(10, 200, 10)), + # 'w': [0.25, 0.5, 0.75], # disabled due to low influence according to KW-test (H=0.0215) and mutual information + 'c1': [round(n, 2) for n in np.arange(1.0, 3.5+0.25, 0.25).tolist()], + 'c2': [round(n, 2) for n in np.arange(0.5, 2.0+0.25, 0.25).tolist()] + } + elif strategy_to_tune.lower() == "firefly_algorithm": + hyperparams = { + 'popsize': [10, 20, 30], + 'maxiter': [50, 100, 150], + 'B0': [0.5, 1.0, 1.5], + 'gamma': [0.1, 0.25, 0.5], + 'alpha': [0.1, 0.2, 0.3] + } + elif strategy_to_tune.lower() == "greedy_ils": + hyperparams = { + 'neighbor': ['Hamming', 'adjacent'], + 'restart': [True, False], + 'no_improvement': [10, 25, 50, 75], + 'random_walk': [0.1, 0.2, 0.3, 0.4, 0.5] + } + elif strategy_to_tune.lower() == "dual_annealing": + hyperparams = { + 'method': ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'], + } + elif strategy_to_tune.lower() == "diff_evo": + hyperparams = { + 'method': ["best1bin", "rand1bin", "best2bin", "rand2bin", "best1exp", "rand1exp", "best2exp", "rand2exp", "currenttobest1bin", "currenttobest1exp", "randtobest1bin", "randtobest1exp"], # best1bin + 'popsize': list(range(2, 50+1, 2)), # 50 + 'popsize_times_dimensions': [True, False], # False + 'F': [round(n, 2) for n in np.arange(0.1, 2.0+0.1, 0.1).tolist()], # 1.3 + 'CR': [round(n, 2) for n in np.arange(0.05, 1.0+0.05, 0.05).tolist()] # 0.9 + } + elif strategy_to_tune.lower() == "basinhopping": + hyperparams = { + 'method': ["Nelder-Mead", "Powell", "CG", "BFGS", "L-BFGS-B", "TNC", "COBYLA", "SLSQP"], + 'T': [0.1, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5], + } + elif strategy_to_tune.lower() == "genetic_algorithm": + # hyperparams = { + # 'method': ["single_point", "two_point", "uniform", "disruptive_uniform"], + # 'popsize': [10, 20, 30], + # 'maxiter': [50, 100, 150], + # 'mutation_chance': [5, 10, 20] + # } + hyperparams = { + 'method': ["single_point", "two_point", "uniform", "disruptive_uniform"], + 'popsize': list(range(2, 50+1, 2)), + 'maxiter': list(range(10, 200, 10)), + 'mutation_chance': list(range(5, 100, 5)) + } + elif strategy_to_tune.lower() == "greedy_mls": + hyperparams = { + 'neighbor': ["Hamming", "adjacent"], + 'restart': [True, False], + 'randomize': [True, False] + } + elif strategy_to_tune.lower() == "simulated_annealing": + # hyperparams = { + # 'T': [0.5, 1.0, 1.5], + # 'T_min': [0.0001, 0.001, 0.01], + # 'alpha': [0.9925, 0.995, 0.9975], + # 'maxiter': [1, 2, 3] + # } + hyperparams = { + 'T': [round(n, 2) for n in np.arange(0.1, 2.0+0.1, 0.1).tolist()], + 'T_min': [round(n, 4) for n in np.arange(0.0001, 0.1, 0.001).tolist()], + 'alpha': [0.9925, 0.995, 0.9975], + 'maxiter': list(range(1, 10, 1)) + } + elif strategy_to_tune.lower() == "bayes_opt": + hyperparams = { + # 'covariancekernel': ["constantrbf", "rbf", "matern32", "matern52"], + 'covariancelengthscale': [1.0, 1.5, 2.0], + 'method': ["poi", "ei", "lcb", "lcb-srinivas", "multi", "multi-advanced", "multi-fast", "multi-ultrafast"], + 'samplingmethod': ["random", "LHS"], + 'popsize': [10, 20, 30] + } + else: + raise ValueError(f"Invalid argument {strategy_to_tune=}") + + # run the hyperparameter tuning + result, env = tune_hyper_params(strategy_to_tune.lower(), hyperparams, restrictions=restrictions, **kwargs) print(result) print(env['best_config']) diff --git a/kernel_tuner/integration.py b/kernel_tuner/integration.py index adea812ed..b51a3eb36 100644 --- a/kernel_tuner/integration.py +++ b/kernel_tuner/integration.py @@ -4,7 +4,7 @@ from jsonschema import validate -from kernel_tuner import util +from kernel_tuner.util import get_instance_string, looks_like_a_filename, read_file #specifies for a number of pre-defined objectives whether #the objective should be minimized or maximized (boolean value denotes higher is better) @@ -208,8 +208,8 @@ def top_result(item): meta["version_number"] = "1.0" meta["kernel_name"] = kernel_name if kernel_string and not callable(kernel_string) and not isinstance(kernel_string, list): - if util.looks_like_a_filename(kernel_string): - meta["kernel_string"] = util.read_file(kernel_string) + if looks_like_a_filename(kernel_string): + meta["kernel_string"] = read_file(kernel_string) else: meta["kernel_string"] = kernel_string meta["objective"] = objective @@ -340,7 +340,7 @@ def _select_best_common_config(results, objective, objective_higher_is_better): for config in results: params = config["tunable_parameters"] - config_str = util.get_instance_string(params) + config_str = get_instance_string(params) #count occurances results_table[config_str] = results_table.get(config_str,0) + 1 #add to performance diff --git a/kernel_tuner/interface.py b/kernel_tuner/interface.py index a759dec07..32e91c86f 100644 --- a/kernel_tuner/interface.py +++ b/kernel_tuner/interface.py @@ -30,12 +30,14 @@ from datetime import datetime from pathlib import Path from time import perf_counter +from copy import deepcopy import numpy +from constraint import Constraint import kernel_tuner.core as core import kernel_tuner.util as util -from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results +from kernel_tuner.file_utils import get_input_file, get_t4_metadata, get_t4_results, import_class_from_file from kernel_tuner.integration import get_objective_defaults from kernel_tuner.runners.sequential import SequentialRunner from kernel_tuner.runners.simulation import SimulationRunner @@ -60,9 +62,11 @@ mls, ordered_greedy_mls, pso, + pyatf_strategies, random_sample, simulated_annealing, ) +from kernel_tuner.strategies.wrapper import OptAlgWrapper strategy_map = { "brute_force": brute_force, @@ -80,6 +84,7 @@ "simulated_annealing": simulated_annealing, "firefly_algorithm": firefly_algorithm, "bayes_opt": bayes_opt, + "pyatf_strategies": pyatf_strategies, } @@ -603,9 +608,6 @@ def tune_kernel( # ensure there is always at least three names util.append_default_block_size_names(block_size_names) - if iterations < 1: - raise ValueError("Iterations should be at least one!") - # sort all the options into separate dicts opts = locals() kernel_options = Options([(k, opts[k]) for k in _kernel_options.keys()]) @@ -630,34 +632,20 @@ def tune_kernel( logging.debug("device_options: %s", util.get_config_string(device_options)) # check whether the selected strategy and options are valid + strategy_string = strategy if strategy: if strategy in strategy_map: strategy = strategy_map[strategy] else: - raise ValueError(f"Unkown strategy {strategy}, must be one of: {', '.join(list(strategy_map.keys()))}") - - # make strategy_options into an Options object - if tuning_options.strategy_options: - if not isinstance(strategy_options, Options): - tuning_options.strategy_options = Options(strategy_options) - - # select strategy based on user options - if "fraction" in tuning_options.strategy_options and not tuning_options.strategy == "random_sample": - raise ValueError( - 'It is not possible to use fraction in combination with strategies other than "random_sample". ' - 'Please set strategy="random_sample", when using "fraction" in strategy_options' - ) - - # check if method is supported by the selected strategy - if "method" in tuning_options.strategy_options: - method = tuning_options.strategy_options.method - if method not in strategy.supported_methods: - raise ValueError("Method %s is not supported for strategy %s" % (method, tuning_options.strategy)) - - # if no strategy_options dict has been passed, create empty dictionary - else: - tuning_options.strategy_options = Options({}) + # check for user-defined strategy + if hasattr(strategy, "tune") and callable(strategy.tune): + # user-defined strategy + pass + else: + raise ValueError(f"Unkown strategy {strategy}, must be one of: {', '.join(list(strategy_map.keys()))}") + # ensure strategy_options is an Options object + tuning_options.strategy_options = Options(strategy_options or {}) # if no strategy selected else: strategy = brute_force @@ -671,19 +659,23 @@ def tune_kernel( # we normalize it so that it always accepts atol. tuning_options.verify = util.normalize_verify_function(tuning_options.verify) + def preprocess_cache(filepath): + if isinstance(filepath, Path): + filepath = str(filepath.resolve()) + if filepath[-5:] != ".json": + filepath += ".json" + return filepath + # process cache if cache: - if isinstance(cache, Path): - cache = str(cache.resolve()) - if cache[-5:] != ".json": - cache += ".json" - + cache = preprocess_cache(cache) util.process_cache(cache, kernel_options, tuning_options, runner) else: tuning_options.cache = {} tuning_options.cachefile = None # create search space + tuning_options.restrictions_unmodified = deepcopy(restrictions) searchspace = Searchspace(tune_params, restrictions, runner.dev.max_threads, **searchspace_construction_options) restrictions = searchspace._modified_restrictions tuning_options.restrictions = restrictions @@ -861,21 +853,67 @@ def _check_user_input(kernel_name, kernelsource, arguments, block_size_names): util.check_block_size_names(block_size_names) -def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation_mode = False, output_T4 = True, iterations = 7, strategy_options = None): - """Call the tune function with a T1 input file.""" +def tune_kernel_T1( + input_filepath: Path, + cache_filepath: Path = None, + objective="time", + objective_higher_is_better=False, + simulation_mode=False, + output_T4=True, + iterations=7, + device=None, + strategy: str=None, + strategy_options: dict={}, +) -> tuple: + """Call the tune function with a T1 input file. + + The device, strategy and strategy_options can be overridden by passing a strategy name and options, otherwise the input file specification is used. + """ inputs = get_input_file(input_filepath) kernelspec: dict = inputs["KernelSpecification"] kernel_name: str = kernelspec["KernelName"] kernel_filepath = Path(kernelspec["KernelFile"]) kernel_source = ( - kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent.parent / kernel_filepath + kernel_filepath if kernel_filepath.exists() else Path(input_filepath).parent / kernel_filepath + ) + kernel_source = ( + kernel_source if kernel_source.exists() else Path(input_filepath).parent.parent / kernel_filepath ) assert kernel_source.exists(), f"KernelFile '{kernel_source}' does not exist at {kernel_source.resolve()}" language: str = kernelspec["Language"] problem_size = kernelspec["ProblemSize"] - device = kernelspec["Device"]["Name"] - strategy = inputs["Search"]["Name"] - + if device is None: + device = kernelspec["Device"]["Name"] + if strategy is None: + strategy = inputs["Search"]["Name"] + if "Attributes" in inputs["Search"]: + for attribute in inputs["Search"]["Attributes"]: + strategy_options[attribute["Name"]] = attribute["Value"] + if "Budget" in inputs: + budget = inputs["Budget"][0] + if budget["Type"] == "ConfigurationCount": + strategy_options["max_fevals"] = budget["BudgetValue"] + elif budget["Type"] == "TuningDuration": + strategy_options["time_limit"] = budget["BudgetValue"] # both are in seconds + else: + raise NotImplementedError(f"Budget type in {budget} is not supported") + + # check if the strategy is a path + if "custom_search_method_path" in strategy_options: + # if it is a path, import the strategy from the file + opt_path: Path = Path(strategy_options["custom_search_method_path"]) + class_name: str = strategy + assert opt_path.exists(), f"Custom search method path '{opt_path}' does not exist relative to current working directory {Path.cwd()}" + optimizer_class = import_class_from_file(opt_path, class_name) + filter_keys = ["custom_search_method_path", "max_fevals", "time_limit", "constraint_aware"] + adjusted_strategy_options = {k:v for k, v in strategy_options.items() if k not in filter_keys} + optimizer_instance = optimizer_class(**adjusted_strategy_options) + strategy = OptAlgWrapper(optimizer_instance) + if "constraint_aware" not in strategy_options and hasattr(optimizer_instance, "constraint_aware"): + # if the optimizer has a constraint_aware attribute, set it in the strategy options + strategy_options["constraint_aware"] = optimizer_instance.constraint_aware + + # set the cache path if cache_filepath is None and "SimulationInput" in kernelspec: cache_filepath = Path(kernelspec["SimulationInput"]) @@ -892,10 +930,12 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation tune_param = None if param["Type"] in ["int", "float"]: vals = param["Values"] - if vals[:5] == "list(" or (vals[0] == "[" and vals[-1] == "]"): + if "list(" in vals or "range(" in vals or (vals[0] == "[" and vals[-1] == "]"): tune_param = eval(vals) else: tune_param = literal_eval(vals) + if param["Type"] == "string": + tune_param = eval(param["Values"]) if tune_param is not None: tune_params[param["Name"]] = tune_param else: @@ -917,7 +957,11 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation cmem_arguments = {} for arg in kernelspec["Arguments"]: argument = None - if arg["Type"] == "float" and arg["MemoryType"] == "Vector": + if arg["MemoryType"] == "Vector": + if arg["Type"] != "float": + raise NotImplementedError( + f"Conversion for vector type '{arg['Type']}' has not yet been implemented: {arg}" + ) size = arg["Size"] if isinstance(size, str): args = tune_params.copy() @@ -931,6 +975,13 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation argument = numpy.random.randn(size).astype(numpy.float32) else: raise NotImplementedError(f"Conversion for fill type '{arg['FillType']}' has not yet been implemented") + elif arg["MemoryType"] == "Scalar": + if arg["Type"] == "float": + argument = numpy.float32(arg["FillValue"]) + elif arg["Type"] == "int32": + argument = numpy.int32(arg["FillValue"]) + else: + raise NotImplementedError() if argument is not None: arguments.append(argument) if "MemType" in arg and arg["MemType"] == "Constant": @@ -939,7 +990,6 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation raise NotImplementedError(f"Conversion for this type of argument has not yet been implemented: {arg}") # tune with the converted inputs - # TODO add objective to tune_kernel and get_t4_results calls once available in T1 results, env = tune_kernel( kernel_name, kernel_source, @@ -959,10 +1009,12 @@ def tune_kernel_T1(input_filepath: Path, cache_filepath: Path = None, simulation verbose=False, iterations=iterations, strategy=strategy, - strategy_options=strategy_options + strategy_options=strategy_options, + objective=objective, + objective_higher_is_better=objective_higher_is_better, ) if output_T4: - return get_t4_metadata(), get_t4_results(results, tune_params) + return get_t4_metadata(), get_t4_results(results, tune_params, objective=objective) return results, env diff --git a/kernel_tuner/runners/simulation.py b/kernel_tuner/runners/simulation.py index 22c7c667c..3b162dc1b 100644 --- a/kernel_tuner/runners/simulation.py +++ b/kernel_tuner/runners/simulation.py @@ -2,6 +2,7 @@ import logging from collections import namedtuple from time import perf_counter +from warnings import warn from kernel_tuner import util from kernel_tuner.runners.runner import Runner @@ -82,7 +83,7 @@ def run(self, parameter_space, tuning_options): results = [] - # iterate over parameter space + # iterate over parameter space for element in parameter_space: # check if element is in the cache @@ -127,8 +128,26 @@ def run(self, parameter_space, tuning_options): results.append(result) continue - # if the element is not in the cache, raise an error - check = util.check_restrictions(tuning_options.restrictions, dict(zip(tuning_options['tune_params'].keys(), element)), True) + # if the configuration is not in the cache and not within restrictions, simulate an InvalidConfig with warning + params_dict = dict(zip(tuning_options['tune_params'].keys(), element)) + check = util.check_restrictions(tuning_options.restrictions, params_dict, True) + if not check: + result = params_dict + result['compile_time'] = 0 + result['verification_time'] = 0 + result['benchmark_time'] = 0 + result['strategy_time'] = self.last_strategy_time + + total_time = 1000 * (perf_counter() - self.start_time) + self.start_time = perf_counter() + result['framework_time'] = total_time - self.last_strategy_time + + result[tuning_options.objective] = util.InvalidConfig() + results.append(result) + warn(f"Configuration {element} not in cache, does not pass restrictions. Will be treated as an InvalidConfig, but make sure you are evaluating the correct cache file.") + continue + + # if the configuration is not in the cache and passes restrictions, return a ValueError err_string = f"kernel configuration {element} not in cache, does {'' if check else 'not '}pass extra restriction check ({check})" logging.debug(err_string) raise ValueError(f"{err_string} - in simulation mode, all configurations must be present in the cache") diff --git a/kernel_tuner/schema/T1/1.0.0/input-schema.json b/kernel_tuner/schema/T1/1.0.0/input-schema.json index bb53ee594..598a4b3d1 100644 --- a/kernel_tuner/schema/T1/1.0.0/input-schema.json +++ b/kernel_tuner/schema/T1/1.0.0/input-schema.json @@ -189,7 +189,8 @@ "enum": [ "OpenCL", "CUDA", - "Vulkan" + "Vulkan", + "Hypertuner" ] }, "CompilerOptions": { diff --git a/kernel_tuner/schema/T4/1.0.0/results-schema.json b/kernel_tuner/schema/T4/1.0.0/results-schema.json index 298f2662c..511042016 100644 --- a/kernel_tuner/schema/T4/1.0.0/results-schema.json +++ b/kernel_tuner/schema/T4/1.0.0/results-schema.json @@ -59,7 +59,11 @@ "type": "string" }, "value": { - "type": "number" + "type": [ + "number", + "string", + "array" + ] }, "unit": { "type": "string" diff --git a/kernel_tuner/searchspace.py b/kernel_tuner/searchspace.py index d9b310f4e..d3d00052f 100644 --- a/kernel_tuner/searchspace.py +++ b/kernel_tuner/searchspace.py @@ -1,10 +1,15 @@ import ast +import numbers import re from pathlib import Path -from random import choice, shuffle -from typing import List +from random import choice, shuffle, randint +from typing import List, Union +from warnings import warn +from copy import deepcopy +from collections import defaultdict, deque import numpy as np +from scipy.stats.qmc import LatinHypercube from constraint import ( BacktrackingSolver, Constraint, @@ -18,6 +23,14 @@ Solver, ) +try: + import torch + from torch import Tensor + + torch_available = True +except ImportError: + torch_available = False + from kernel_tuner.util import check_restrictions as check_instance_restrictions from kernel_tuner.util import ( compile_restrictions, @@ -26,7 +39,7 @@ get_interval, ) -supported_neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming"] +supported_neighbor_methods = ["strictly-adjacent", "adjacent", "Hamming", "closest-param-indices"] class Searchspace: @@ -38,8 +51,10 @@ def __init__( restrictions, max_threads: int, block_size_names=default_block_size_names, + defer_construction=False, build_neighbors_index=False, neighbor_method=None, + from_cache: dict = None, framework="PythonConstraint", solver_method="PC_OptimizedBacktrackingSolver", path_to_ATF_cache: Path = None, @@ -51,20 +66,58 @@ def __init__( adjacent: picks closest parameter value in both directions for each parameter Hamming: any parameter config with 1 different parameter value is a neighbor Optionally sort the searchspace by the order in which the parameter values were specified. By default, sort goes from first to last parameter, to reverse this use sort_last_param_first. + Optionally an imported cache can be used instead with `from_cache`, in which case the `tune_params`, `restrictions` and `max_threads` arguments can be set to None, and construction is skipped. + Optionally construction can be deffered to a later time by setting `defer_construction` to True, in which case the searchspace is not built on instantiation (experimental). """ + # check the arguments + if from_cache is not None: + assert ( + tune_params is None and restrictions is None and max_threads is None + ), "When `from_cache` is used, the positional arguments must be set to None." + tune_params = from_cache["tune_params"] + if from_cache is None: + assert tune_params is not None and max_threads is not None, "Must specify positional arguments." + # set the object attributes using the arguments framework_l = framework.lower() restrictions = restrictions if restrictions is not None else [] self.tune_params = tune_params - self.restrictions = restrictions + self.original_tune_params = tune_params.copy() if hasattr(tune_params, "copy") else tune_params + self.max_threads = max_threads + self.block_size_names = block_size_names + self._tensorspace = None + self.tensor_dtype = torch.float32 if torch_available else None + self.tensor_device = torch.device("cpu") if torch_available else None + self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device) + self._tensorspace_bounds = None + self._tensorspace_bounds_indices = [] + self._tensorspace_categorical_dimensions = [] + self._tensorspace_param_config_structure = [] + self._map_tensor_to_param = {} + self._map_param_to_tensor = {} + restrictions = [restrictions] if not isinstance(restrictions, (list, tuple)) else restrictions + self.restrictions = deepcopy(restrictions) + self.original_restrictions = deepcopy(restrictions) # keep the original restrictions, so that the searchspace can be modified later # the searchspace can add commonly used constraints (e.g. maxprod(blocks) <= maxthreads) - self._modified_restrictions = restrictions + self._modified_restrictions = deepcopy(restrictions) self.param_names = list(self.tune_params.keys()) self.params_values = tuple(tuple(param_vals) for param_vals in self.tune_params.values()) self.params_values_indices = None self.build_neighbors_index = build_neighbors_index self.solver_method = solver_method + self.tune_param_is_numeric = { param_name: all(isinstance(val, (int, float)) for val in param_values) and not any(isinstance(val, bool) for val in param_values) for (param_name, param_values) in tune_params.items() } + self.tune_param_is_numeric_mask = np.array(list(self.tune_param_is_numeric.values()), dtype=bool) + self.__numpy_types = [np.array(vals).dtype for vals in self.params_values] + self.__tune_params_to_index_lookup = None + self.__tune_params_from_index_lookup = None + self.__list_param_indices = None + self.__list_param_indices_lower_bounds = None + self.__list_param_indices_upper_bounds = None + self.__list_numpy_numeric = None + self.__true_tune_params = None self.__neighbor_cache = { method: dict() for method in supported_neighbor_methods } + self.__neighbor_partial_cache = { method: defaultdict(list) for method in supported_neighbor_methods } + self.neighbors_index = dict() self.neighbor_method = neighbor_method if (neighbor_method is not None or build_neighbors_index) and neighbor_method not in supported_neighbor_methods: raise ValueError(f"Neighbor method is {neighbor_method}, must be one of {supported_neighbor_methods}") @@ -73,7 +126,12 @@ def __init__( restrictions = [restrictions] if not isinstance(restrictions, list) else restrictions if ( len(restrictions) > 0 - and any(isinstance(restriction, str) for restriction in restrictions) + and ( + any(isinstance(restriction, str) for restriction in restrictions) + or any( + isinstance(restriction[0], str) for restriction in restrictions if isinstance(restriction, tuple) + ) + ) and not ( framework_l == "pysmt" or framework_l == "bruteforce" or framework_l == "pythonconstraint" or solver_method.lower() == "pc_parallelsolver" ) @@ -85,46 +143,57 @@ def __init__( format=framework_l if framework_l == "pyatf" else None, ) - # get the framework given the framework argument - if framework_l == "pythonconstraint": - searchspace_builder = self.__build_searchspace - elif framework_l == "pysmt": - searchspace_builder = self.__build_searchspace_pysmt - elif framework_l == "pyatf": - searchspace_builder = self.__build_searchspace_pyATF - elif framework_l == "atf_cache": - searchspace_builder = self.__build_searchspace_ATF_cache - self.path_to_ATF_cache = path_to_ATF_cache - elif framework_l == "bruteforce": - searchspace_builder = self.__build_searchspace_bruteforce - else: - raise ValueError(f"Invalid framework parameter '{framework}'") - - # get the solver given the solver method argument - solver = "" - if solver_method.lower() == "pc_backtrackingsolver": - solver = BacktrackingSolver() - elif solver_method.lower() == "pc_optimizedbacktrackingsolver": - solver = OptimizedBacktrackingSolver(forwardcheck=False) - elif solver_method.lower() == "pc_parallelsolver": - raise NotImplementedError("ParallelSolver is not yet implemented") - # solver = ParallelSolver() - elif solver_method.lower() == "pc_recursivebacktrackingsolver": - solver = RecursiveBacktrackingSolver() - elif solver_method.lower() == "pc_minconflictssolver": - solver = MinConflictsSolver() + # if an imported cache, skip building and set the values directly + if from_cache is not None: + configs = dict(from_cache["cache"]).values() + self.list = list(tuple([v for p, v in c.items() if p in self.tune_params]) for c in configs) + self.size = len(self.list) + self.__dict = dict(zip(self.list, range(self.size))) else: - raise ValueError(f"Solver method {solver_method} not recognized.") - - # build the search space - self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver) - self.__numpy = None - self.num_params = len(self.tune_params) - self.indices = np.arange(self.size) - if neighbor_method is not None and neighbor_method != "Hamming": - self.__prepare_neighbors_index() - if build_neighbors_index: - self.neighbors_index = self.__build_neighbors_index(neighbor_method) + # get the framework given the framework argument + if framework_l == "pythonconstraint": + searchspace_builder = self.__build_searchspace + elif framework_l == "pysmt": + searchspace_builder = self.__build_searchspace_pysmt + elif framework_l == "pyatf": + searchspace_builder = self.__build_searchspace_pyATF + elif framework_l == "atf_cache": + searchspace_builder = self.__build_searchspace_ATF_cache + self.path_to_ATF_cache = path_to_ATF_cache + elif framework_l == "bruteforce": + searchspace_builder = self.__build_searchspace_bruteforce + else: + raise ValueError(f"Invalid framework parameter {framework}") + + # get the solver given the solver method argument + solver = "" + if solver_method.lower() == "pc_backtrackingsolver": + solver = BacktrackingSolver() + elif solver_method.lower() == "pc_optimizedbacktrackingsolver": + solver = OptimizedBacktrackingSolver(forwardcheck=False) + elif solver_method.lower() == "pc_parallelsolver": + raise NotImplementedError("ParallelSolver is not yet implemented") + # solver = ParallelSolver() + elif solver_method.lower() == "pc_recursivebacktrackingsolver": + solver = RecursiveBacktrackingSolver() + elif solver_method.lower() == "pc_minconflictssolver": + solver = MinConflictsSolver() + else: + raise ValueError(f"Solver method {solver_method} not recognized.") + + if not defer_construction: + # build the search space + self.list, self.__dict, self.size = searchspace_builder(block_size_names, max_threads, solver) + + # finalize construction + if not defer_construction: + self.__numpy = None + self.num_params = len(self.tune_params) + self.indices = np.arange(self.size) + if neighbor_method is not None and neighbor_method != "Hamming": + self.__prepare_neighbors_index() + if build_neighbors_index: + self.neighbors_index[neighbor_method] = self.__build_neighbors_index(neighbor_method) # def __build_searchspace_ortools(self, block_size_names: list, max_threads: int) -> Tuple[List[tuple], np.ndarray, dict, int]: # # Based on https://developers.google.com/optimization/cp/cp_solver#python_2 @@ -191,6 +260,7 @@ def __build_searchspace_bruteforce(self, block_size_names: list, max_threads: in isinstance(self._modified_restrictions, list) and block_size_restriction_spaced not in self._modified_restrictions ): + print(f"added default block size restriction '{block_size_restriction_spaced}'") self._modified_restrictions.append(block_size_restriction_spaced) if isinstance(self.restrictions, list): self.restrictions.append(block_size_restriction_spaced) @@ -271,14 +341,15 @@ def all_smt(formula, keys) -> list: return self.__parameter_space_list_to_lookup_and_return_type(parameter_space_list) - def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, solver: Solver): - """Builds the searchspace using pyATF.""" - from pyatf import TP, Interval, Set, Tuner - from pyatf.cost_functions.generic import CostFunction - from pyatf.search_techniques import Exhaustive + def get_tune_params_pyatf(self, block_size_names: list = None, max_threads: int = None): + """Convert the tune_params and restrictions to pyATF tunable parameters.""" + from pyatf import TP, Interval, Set - # Define a bogus cost function - costfunc = CostFunction(":") # bash no-op + # if block_size_names or max_threads are not specified, use the defaults + if block_size_names is None: + block_size_names = self.block_size_names + if max_threads is None: + max_threads = self.max_threads # add the Kernel Tuner default blocksize threads restrictions assert isinstance(self.restrictions, list) @@ -310,28 +381,41 @@ def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, so registered_restrictions.append(index) # define the Tunable Parameters - def get_params(): - params = list() - for index, (key, values) in enumerate(self.tune_params.items()): - vi = get_interval(values) - vals = ( - Interval(vi[0], vi[1], vi[2]) if vi is not None and vi[2] != 0 else Set(*np.array(values).flatten()) - ) - constraint = res_dict.get(key, None) - constraint_source = None - if constraint is not None: - constraint, constraint_source = constraint - # in case of a leftover monolithic restriction, append at the last parameter - if index == len(self.tune_params) - 1 and len(res_dict) == 0 and len(self.restrictions) == 1: - res, params, source = self.restrictions[0] - assert callable(res) - constraint = res - params.append(TP(key, vals, constraint, constraint_source)) - return params + params = list() + for index, (key, values) in enumerate(self.tune_params.items()): + vi = get_interval(values) + vals = ( + Interval(vi[0], vi[1], vi[2]) if vi is not None and vi[2] != 0 else Set(*np.array(values).flatten()) + ) + assert vals is not None, f"Values for parameter {key} are None, this should not happen." + constraint = res_dict.get(key, None) + constraint_source = None + if constraint is not None: + constraint, constraint_source = constraint + # in case of a leftover monolithic restriction, append at the last parameter + if index == len(self.tune_params) - 1 and len(res_dict) == 0 and len(self.restrictions) == 1: + res, params, source = self.restrictions[0] + assert callable(res) + constraint = res + params.append(TP(key, vals, constraint, constraint_source)) + return params + + + def __build_searchspace_pyATF(self, block_size_names: list, max_threads: int, solver: Solver): + """Builds the searchspace using pyATF.""" + from pyatf import Tuner + from pyatf.cost_functions.generic import CostFunction + from pyatf.search_techniques import Exhaustive + + # Define a bogus cost function + costfunc = CostFunction(":") # bash no-op + + # set data + self.tune_params_pyatf = self.get_tune_params_pyatf(block_size_names, max_threads) # tune _, _, tuning_data = ( - Tuner().verbosity(0).tuning_parameters(*get_params()).search_technique(Exhaustive()).tune(costfunc) + Tuner().verbosity(0).tuning_parameters(*self.tune_params_pyatf).search_technique(Exhaustive()).tune(costfunc) ) # transform the result into a list of parameter configurations for validation @@ -386,7 +470,8 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver: # add the user-specified restrictions as constraints on the parameter space if not isinstance(self.restrictions, (list, tuple)): self.restrictions = [self.restrictions] - self.restrictions = convert_constraint_lambdas(self.restrictions) + if any(not isinstance(restriction, (Constraint, FunctionConstraint, str)) for restriction in self.restrictions): + self.restrictions = convert_constraint_lambdas(self.restrictions) parameter_space = self.__add_restrictions(parameter_space) # add the default blocksize threads restrictions last, because it is unlikely to reduce the parameter space by much @@ -409,8 +494,9 @@ def __build_searchspace(self, block_size_names: list, max_threads: int, solver: def __add_restrictions(self, parameter_space: Problem) -> Problem: """Add the user-specified restrictions as constraints on the parameter space.""" - if isinstance(self.restrictions, list): - for restriction in self.restrictions: + restrictions = deepcopy(self.restrictions) + if isinstance(restrictions, list): + for restriction in restrictions: required_params = self.param_names # (un)wrap where necessary @@ -437,17 +523,17 @@ def __add_restrictions(self, parameter_space: Problem) -> Problem: else: parameter_space.addConstraint(restriction, variables) else: - raise ValueError(f"Unrecognized restriction {restriction}") + raise ValueError(f"Unrecognized restriction type {type(restriction)} ({restriction})") # if the restrictions are the old monolithic function, apply them directly (only for backwards compatibility, likely slower than well-specified constraints!) - elif callable(self.restrictions): + elif callable(restrictions): def restrictions_wrapper(*args): - return check_instance_restrictions(self.restrictions, dict(zip(self.param_names, args)), False) + return check_instance_restrictions(restrictions, dict(zip(self.param_names, args)), False) parameter_space.addConstraint(FunctionConstraint(restrictions_wrapper), self.param_names) - elif self.restrictions is not None: - raise ValueError(f"The restrictions are of unsupported type {type(self.restrictions)}") + elif restrictions is not None: + raise ValueError(f"The restrictions are of unsupported type {type(restrictions)}") return parameter_space def __parse_restrictions_pysmt(self, restrictions: list, tune_params: dict, symbols: dict): @@ -600,7 +686,10 @@ def get_list_dict(self) -> dict: return self.__dict def get_list_numpy(self) -> np.ndarray: - """Get the parameter space list as a NumPy array. Initializes the NumPy array if not yet done. + """Get the parameter space list as a NumPy array of tuples with mixed types. + + Rarely faster or more convenient than `get_list_param_indices_numpy` or `get_list_numpy_numeric`. + Initializes the NumPy array if not yet done. Returns: the NumPy array. @@ -609,53 +698,399 @@ def get_list_numpy(self) -> np.ndarray: # create a numpy array of the search space # in order to have the tuples as tuples in numpy, the types are set with a string, but this will make the type np.void # type_string = ",".join(list(type(param).__name__ for param in parameter_space_list[0])) - self.__numpy = np.array(self.list) + types = np.dtype([(param_name, self.__numpy_types[index]) for index, param_name in enumerate(self.param_names)]) + self.__numpy = np.array(self.list, dtype=types) + assert self.__numpy.shape[0] == self.size, f"Expected shape {(self.size,)}, got {self.__numpy.shape}" + assert len(self.__numpy[0]) == self.num_params, f"Expected tuples to be of length {len(self.__numpy[0])}, got {len(self.__numpy[0])}" + # return the numpy array return self.__numpy + def get_list_param_indices_numpy(self) -> np.ndarray: + """Get the parameter space list as a 2D NumPy array of parameter value indices. + + Same as mapping `get_param_indices` over the searchspace, but faster. + Assumes that the parameter configs have the same order as `tune_params`. + + Returns: + the NumPy array. + """ + if self.__list_param_indices is None: + tune_params_to_index_lookup = list() + tune_params_from_index_lookup = list() + for param_name, param_values in self.tune_params.items(): + tune_params_to_index_lookup.append({ value: index for index, value in enumerate(param_values) }) + tune_params_from_index_lookup.append({ index: value for index, value in enumerate(param_values) }) + + # build the list + list_param_indices = list() + for param_config in self.list: + list_param_indices.append([tune_params_to_index_lookup[index][val] for index, val in enumerate(param_config)]) + + # register the computed results + self.__tune_params_to_index_lookup = tune_params_to_index_lookup + self.__tune_params_from_index_lookup = tune_params_from_index_lookup + self.__list_param_indices = np.array(list_param_indices) + assert self.__list_param_indices.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_param_indices.shape}" + + # calculate the actual minimum and maximum index for each parameter after restrictions + self.__list_param_indices_lower_bounds = np.min(self.__list_param_indices, axis=0) + self.__list_param_indices_upper_bounds = np.max(self.__list_param_indices, axis=0) + + largest_index = np.max(self.__list_param_indices) * 2 # multiplied by two to account for worst-case absolute difference operations later + if largest_index >= 2**31: + # if the largest index is larger than 2**31, use int64 to avoid overflow + self.__list_param_indices = self.__list_param_indices.astype(np.int64) + # else: + # self.__list_param_indices = self.__list_param_indices.astype(np.int32) + # + # the below types do not have a sizable performance benifit currently + elif largest_index >= 2**15: + # if the largest index is larger than 2**15, use int32 to avoid overflow + self.__list_param_indices = self.__list_param_indices.astype(np.int32) + elif largest_index >= 2**7: + # if the largest index is larger than 2**7, use int16 to avoid overflow + self.__list_param_indices = self.__list_param_indices.astype(np.int16) + else: + self.__list_param_indices = self.__list_param_indices.astype(np.int8) + return self.__list_param_indices + + def get_param_indices_lower_bounds(self) -> np.ndarray: + """Get the lower bounds of the parameter indices after restrictions.""" + if self.__list_param_indices_lower_bounds is None: + self.get_list_param_indices_numpy() + return self.__list_param_indices_lower_bounds + + def get_param_indices_upper_bounds(self) -> np.ndarray: + """Get the upper bounds of the parameter indices after restrictions.""" + if self.__list_param_indices_upper_bounds is None: + self.get_list_param_indices_numpy() + return self.__list_param_indices_upper_bounds + + def get_list_param_indices_numpy_min(self): + """Get the minimum possible value in the numpy list of parameter indices.""" + return np.iinfo(self.get_list_param_indices_numpy().dtype).min + + def get_list_param_indices_numpy_max(self): + """Get the maximum possible value in the numpy list of parameter indices.""" + return np.iinfo(self.get_list_param_indices_numpy().dtype).max + + def get_list_numpy_numeric(self) -> np.ndarray: + """Get the parameter space list as a 2D NumPy array of numeric values. + + This is a view of the NumPy array returned by `get_list_numpy`, but with only numeric values. + If the searchspace contains non-numeric values, their index will be used instead. + + Returns: + the NumPy array. + """ + if self.__list_numpy_numeric is None: + # self.__list_numpy_numeric = np.where(self.tune_param_is_numeric_mask, self.get_list_numpy(), self.get_list_param_indices_numpy()) + list_numpy_numeric = list() + for index, (param_name, is_numeric) in enumerate(self.tune_param_is_numeric.items()): + list_numpy_numeric.append(self.get_list_numpy()[param_name] if is_numeric else self.get_list_param_indices_numpy()[:, index]) + self.__list_numpy_numeric = np.array(list_numpy_numeric).transpose() + assert self.__list_numpy_numeric.shape == (self.size, self.num_params), f"Expected shape {(self.size, self.num_params)}, got {self.__list_numpy_numeric.shape}" + return self.__list_numpy_numeric + + def get_true_tunable_params(self) -> dict: + """Get the tunable parameters that are actually tunable, i.e. not constant after restrictions.""" + if self.__true_tune_params is None: + true_tune_params = dict() + numpy_list = self.get_list_param_indices_numpy() + for param_index, (param_name, param_values) in enumerate(self.tune_params.items()): + if len(param_values) == 1: + continue # if the parameter is constant, skip it + if not np.all(numpy_list[:, param_index] == numpy_list[0, param_index]): + # if after restrictions there are different values, register the parameter + true_tune_params[param_name] = param_values + self.__true_tune_params = true_tune_params + return self.__true_tune_params + def get_param_indices(self, param_config: tuple) -> tuple: """For each parameter value in the param config, find the index in the tunable parameters.""" - return tuple(self.params_values[index].index(param_value) for index, param_value in enumerate(param_config)) + if self.__tune_params_to_index_lookup is not None: + # if the lookup is already computed, use it + return tuple([self.__tune_params_to_index_lookup[index][param_value] for index, param_value in enumerate(param_config)]) + try: + return tuple(self.params_values[index].index(param_value) for index, param_value in enumerate(param_config)) + except ValueError as e: + for index, param_value in enumerate(param_config): + if param_value not in self.params_values[index]: + # if the parameter value is not in the list of values for that parameter, raise an error + raise ValueError( + f"Parameter value {param_value} ({type(param_value)}) is not in the list of values {self.params_values[index]}" + ) from e + + def get_param_config_from_param_indices(self, param_indices: tuple) -> tuple: + """Get the parameter configuration from the given parameter indices.""" + if self.__tune_params_from_index_lookup is not None: + # if the lookup is already computed, use it + return tuple([self.__tune_params_from_index_lookup[index][param_index] for index, param_index in enumerate(param_indices)]) + return tuple(self.params_values[index][param_index] for index, param_index in enumerate(param_indices)) + + def get_param_config_from_numeric(self, param_config: tuple) -> tuple: + """Get the actual parameter configuration values from a numeric representation of the parameter configuration as in `get_list_numpy_numeric`.""" + if np.all(self.tune_param_is_numeric_mask): + return param_config # if all parameters are numeric, return the input as is + if self.__tune_params_from_index_lookup is None: + # if the lookup is not yet computed, compute it + self.get_list_param_indices_numpy() + if isinstance(param_config, np.ndarray): + param_config = tuple(param_config.tolist()) # if the input is a numpy array, convert it to a tuple + return tuple([val if self.tune_param_is_numeric_mask[index] else self.__tune_params_from_index_lookup[index][val] for index, val in enumerate(param_config)]) def get_param_configs_at_indices(self, indices: List[int]) -> List[tuple]: """Get the param configs at the given indices.""" # map(get) is ~40% faster than numpy[indices] (average based on six searchspaces with 10000, 100000 and 1000000 configs and 10 or 100 random indices) return list(map(self.list.__getitem__, indices)) - def get_param_config_index(self, param_config: tuple): + def get_param_config_index(self, param_config: Union[tuple, any]): """Lookup the index for a parameter configuration, returns None if not found.""" + if torch_available and isinstance(param_config, Tensor): + param_config = self.tensor_to_param_config(param_config) # constant time O(1) access - much faster than any other method, but needs a shadow dict of the search space return self.__dict.get(param_config, None) + def initialize_tensorspace(self, dtype=None, device=None): + """Encode the searchspace in a Tensor. Save the mapping. Call this function directly to control the precision or device used.""" + assert self._tensorspace is None, "Tensorspace is already initialized" + skipped_count = 0 + bounds = [] + if dtype is not None: + self.tensor_dtype = dtype + if device is not None: + self.tensor_device = device + self.tensor_kwargs = dict(dtype=self.tensor_dtype, device=self.tensor_device) + + # generate the mappings to and from tensor values + for index, param_values in enumerate(self.params_values): + # filter out parameters that do not matter, more efficient and avoids bounds problem + if len(param_values) < 2 or all(p == param_values[0] for p in param_values): + # keep track of skipped parameters, add them back in conversion functions + self._tensorspace_param_config_structure.append(param_values[0]) + skipped_count += 1 + continue + else: + self._tensorspace_param_config_structure.append(None) + + # convert numericals to float, or encode categorical + if all(isinstance(v, numbers.Real) for v in param_values): + tensor_values = torch.tensor(param_values, dtype=self.tensor_dtype) + else: + self._tensorspace_categorical_dimensions.append(index - skipped_count) + # tensor_values = np.arange(len(param_values)) + tensor_values = torch.arange(len(param_values), dtype=self.tensor_dtype) + + # write the mappings to the object + self._map_param_to_tensor[index] = dict(zip(param_values, tensor_values.tolist())) + self._map_tensor_to_param[index] = dict(zip(tensor_values.tolist(), param_values)) + bounds.append((tensor_values.min(), tensor_values.max())) + if tensor_values.min() < tensor_values.max(): + self._tensorspace_bounds_indices.append(index - skipped_count) + + # do some checks + assert len(self.params_values) == len(self._tensorspace_param_config_structure) + assert len(self._map_param_to_tensor) == len(self._map_tensor_to_param) == len(bounds) + assert len(self._tensorspace_bounds_indices) <= len(bounds) + + # apply the mappings on the full searchspace + # numpy_repr = self.get_list_numpy() + # numpy_repr = np.apply_along_axis(self.param_config_to_tensor, 1, numpy_repr) + # self._tensorspace = torch.from_numpy(numpy_repr.astype(self.tensor_dtype)).to(self.tensor_device) + self._tensorspace = torch.stack(tuple(map(self.param_config_to_tensor, self.list))) + + # set the bounds in the correct format (one array for the min, one for the max) + bounds = torch.tensor(bounds, **self.tensor_kwargs) + self._tensorspace_bounds = torch.cat([bounds[:, 0], bounds[:, 1]]).reshape((2, bounds.shape[0])) + + def has_tensorspace(self) -> bool: + """Check if the tensorspace has been initialized.""" + return self._tensorspace is not None + + def get_tensorspace(self): + """Get the searchspace encoded in a Tensor. To use a non-default dtype or device, call `initialize_tensorspace` first.""" + if not self.has_tensorspace(): + self.initialize_tensorspace() + return self._tensorspace + + def get_tensorspace_categorical_dimensions(self): + """Get the a list of the categorical dimensions in the tensorspace.""" + return self._tensorspace_categorical_dimensions + + def param_config_to_tensor(self, param_config: tuple): + """Convert from a parameter configuration to a Tensor.""" + if len(self._map_param_to_tensor) == 0: + self.initialize_tensorspace() + array = [] + for i, param in enumerate(param_config): + if self._tensorspace_param_config_structure[i] is not None: + continue # skip over parameters not in the tensorspace + mapping = self._map_param_to_tensor[i] + conversions = [None, str, float, int, bool] + for c in conversions: + try: + c_param = param if c is None else c(param) + array.append(mapping[c_param]) + break + except (KeyError, ValueError) as e: + if c == conversions[-1]: + raise KeyError(f"No variant of {param} could be found in {mapping}") from e + return torch.tensor(array, **self.tensor_kwargs) + + def tensor_to_param_config(self, tensor): + """Convert from a Tensor to a parameter configuration.""" + assert tensor.dim() == 1, f"Parameter configuration tensor must be 1-dimensional, is {tensor.dim()} ({tensor})" + if len(self._map_tensor_to_param) == 0: + self.initialize_tensorspace() + config = self._tensorspace_param_config_structure.copy() + skip_counter = 0 + for i, param in enumerate(config): + if param is not None: + skip_counter += 1 + else: + value = tensor[i - skip_counter].item() + config[i] = self._map_tensor_to_param[i][value] + return tuple(config) + + def get_tensorspace_bounds(self): + """Get the bounds to the tensorspace parameters, returned as a 2 x d dimensional tensor, and the indices of the parameters.""" + if not self.has_tensorspace(): + self.initialize_tensorspace() + return self._tensorspace_bounds, self._tensorspace_bounds_indices + def __prepare_neighbors_index(self): """Prepare by calculating the indices for the individual parameters.""" - self.params_values_indices = np.array(list(self.get_param_indices(param_config) for param_config in self.list)) + if self.params_values_indices is None: + self.params_values_indices = self.get_list_param_indices_numpy() + + def __get_neighbor_indices_closest_param_indices(self, param_config: tuple, param_index: int = None, return_one=False) -> List[int]: + """Get the neighbors closest in parameter indices difference from the parameter configuration. Always returns at least 1 neighbor.""" + param_indices = self.get_param_indices(param_config) + + # get the indices of the parameter values + if self.params_values_indices is None: + self.__prepare_neighbors_index() + + # calculate the absolute difference between the parameter value indices + abs_index_difference = np.abs(self.params_values_indices - np.array(param_indices), dtype=self.params_values_indices.dtype) + # calculate the sum of the absolute differences for each parameter configuration + sum_of_index_differences = np.sum(abs_index_difference, axis=1) + if param_index is not None: + # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration + sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + if return_one: + # if return_one is True, return the index of the closest parameter configuration (faster than finding all) + get_partial_neighbors_indices = [np.argmin(sum_of_index_differences)] + else: + # find the param config indices where the difference is the smallest + min_difference = np.min(sum_of_index_differences) + matching_indices = (sum_of_index_differences == min_difference).nonzero()[0] + return matching_indices def __get_neighbors_indices_hamming(self, param_config: tuple) -> List[int]: - """Get the neighbors using Hamming distance from the parameter configuration.""" - num_matching_params = np.count_nonzero(self.get_list_numpy() == param_config, -1) + """Get the neighbors at 1 Hamming distance from the parameter configuration.""" + param_indices = self.get_param_indices(param_config) + num_matching_params = np.count_nonzero(self.get_list_param_indices_numpy() == param_indices, -1) matching_indices = (num_matching_params == self.num_params - 1).nonzero()[0] return matching_indices + def __get_random_neighbor_hamming(self, param_config: tuple) -> tuple: + """Get a random neighbor at 1 Hamming distance from the parameter configuration.""" + arr = self.get_list_param_indices_numpy() + target = np.array(self.get_param_indices(param_config)) + assert arr[0].shape == target.shape + + # find the first row that differs from the target in exactly one column, return as soon as one is found + random_order_indices = np.random.permutation(arr.shape[0]) + for i in random_order_indices: + # assert arr[i].shape == target.shape, f"Row {i} shape {arr[i].shape} does not match target shape {target.shape}" + if np.count_nonzero(arr[i] != target) == 1: + self.__add_to_neighbor_partial_cache(param_config, [i], "Hamming", full_neighbors=False) + return self.get_param_configs_at_indices([i])[0] + return None + + def __get_random_neighbor_adjacent(self, param_config: tuple) -> tuple: + """Get an approximately random adjacent neighbor of the parameter configuration.""" + # NOTE: this is not truly random as we only progressively increase the allowed index difference if no neighbors are found, but much faster than generating all neighbors + + # get the indices of the parameter values + if self.params_values_indices is None: + self.__prepare_neighbors_index() + param_config_index = self.get_param_config_index(param_config) + param_config_value_indices = ( + self.get_param_indices(param_config) + if param_config_index is None + else self.params_values_indices[param_config_index] + ) + max_index_difference_per_param = [max(len(self.params_values[p]) - 1 - i, i) for p, i in enumerate(param_config_value_indices)] + + # calculate the absolute difference between the parameter value indices + abs_index_difference = np.abs(self.params_values_indices - np.array(param_config_value_indices), dtype=self.params_values_indices.dtype) + + # start at an index difference of 1, progressively increase - potentially expensive if there are no neighbors until very late + max_index_difference = max(max_index_difference_per_param) + allowed_index_difference = 1 + allowed_values = [[v] for v in param_config] + while allowed_index_difference <= max_index_difference: + # get the param config indices where the difference is at most allowed_index_difference for each position + matching_indices = list((np.max(abs_index_difference, axis=1) <= allowed_index_difference).nonzero()[0]) + # as the selected param config does not differ anywhere, remove it from the matches + if param_config_index is not None: + matching_indices.remove(param_config_index) + + # if there are matching indices, return a random one + if len(matching_indices) > 0: + self.__add_to_neighbor_partial_cache(param_config, matching_indices, "adjacent", full_neighbors=allowed_index_difference == max_index_difference) + + # get a random index from the matching indices + random_neighbor_index = choice(matching_indices) + return self.get_param_configs_at_indices([random_neighbor_index])[0] + + # if there are no matching indices, increase the allowed index difference and start over + allowed_index_difference += 1 + return None + + def __add_to_neighbor_partial_cache(self, param_config: tuple, neighbor_indices: List[int], neighbor_method: str, full_neighbors = False): + """Add the neighbor indices to the partial cache using the given parameter configuration.""" + param_config_index = self.get_param_config_index(param_config) + if param_config_index is None: + return # we need a valid parameter configuration to add to the cache + # add the indices to the partial cache for the parameter configuration + if full_neighbors: + self.__neighbor_partial_cache[neighbor_method][param_config_index] = neighbor_indices + else: + for neighbor_index in neighbor_indices: + if neighbor_index not in self.__neighbor_partial_cache[neighbor_method][param_config_index]: + self.__neighbor_partial_cache[neighbor_method][param_config_index].append(neighbor_index) + # add the parameter configuration index to the partial cache for each neighbor + for neighbor_index in neighbor_indices: + if param_config_index not in self.__neighbor_partial_cache[neighbor_method][neighbor_index]: + self.__neighbor_partial_cache[neighbor_method][neighbor_index].append(param_config_index) + def __get_neighbors_indices_strictlyadjacent( self, param_config_index: int = None, param_config: tuple = None ) -> List[int]: """Get the neighbors using strictly adjacent distance from the parameter configuration (parameter index absolute difference == 1).""" + if self.params_values_indices is None: + self.__prepare_neighbors_index() param_config_value_indices = ( self.get_param_indices(param_config) if param_config_index is None else self.params_values_indices[param_config_index] ) # calculate the absolute difference between the parameter value indices - abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices) + abs_index_difference = np.abs(self.params_values_indices - param_config_value_indices, dtype=self.params_values_indices.dtype) # get the param config indices where the difference is one or less for each position matching_indices = (np.max(abs_index_difference, axis=1) <= 1).nonzero()[0] # as the selected param config does not differ anywhere, remove it from the matches if param_config_index is not None: - matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=False) + matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=True) return matching_indices def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param_config: tuple = None) -> List[int]: """Get the neighbors using adjacent distance from the parameter configuration (parameter index absolute difference >= 1).""" + if self.params_values_indices is None: + self.__prepare_neighbors_index() param_config_value_indices = ( self.get_param_indices(param_config) if param_config_index is None @@ -666,18 +1101,17 @@ def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param # transpose to get the param indices difference per parameter instead of per param config index_difference_transposed = index_difference.transpose() # for each parameter get the closest upper and lower parameter (absolute index difference >= 1) - # np.PINF has been replaced by 1e12 here, as on some systems np.PINF becomes np.NINF upper_bound = tuple( np.min( index_difference_transposed[p][(index_difference_transposed[p] > 0).nonzero()], - initial=1e12, + initial=self.get_list_param_indices_numpy_max(), ) for p in range(self.num_params) ) lower_bound = tuple( np.max( index_difference_transposed[p][(index_difference_transposed[p] < 0).nonzero()], - initial=-1e12, + initial=self.get_list_param_indices_numpy_min(), ) for p in range(self.num_params) ) @@ -687,7 +1121,7 @@ def __get_neighbors_indices_adjacent(self, param_config_index: int = None, param ) # as the selected param config does not differ anywhere, remove it from the matches if param_config_index is not None: - matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=False) + matching_indices = np.setdiff1d(matching_indices, [param_config_index], assume_unique=True) return matching_indices def __build_neighbors_index(self, neighbor_method) -> List[List[int]]: @@ -704,12 +1138,16 @@ def __build_neighbors_index(self, neighbor_method) -> List[List[int]]: self.__get_neighbors_indices_strictlyadjacent(param_config_index, param_config) for param_config_index, param_config in enumerate(self.list) ) - if neighbor_method == "adjacent": return list( self.__get_neighbors_indices_adjacent(param_config_index, param_config) for param_config_index, param_config in enumerate(self.list) ) + if neighbor_method == "closest-param-indices": + return list( + self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index) + for param_config_index, param_config in enumerate(self.list) + ) raise NotImplementedError(f"The neighbor method {neighbor_method} is not implemented") @@ -723,26 +1161,156 @@ def get_random_sample_indices(self, num_samples: int) -> np.ndarray: def get_random_sample(self, num_samples: int) -> List[tuple]: """Get the parameter configurations for a random, non-conflicting sample (caution: not unique in consecutive calls).""" + if self.size < num_samples: + warn( + f"Too many samples requested ({num_samples}), reducing the number of samples to the searchspace size ({self.size})" + ) + num_samples = self.size return self.get_param_configs_at_indices(self.get_random_sample_indices(num_samples)) - def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None) -> List[int]: + def get_distributed_random_sample_indices(self, num_samples: int, sampling_factor=10) -> List[int]: + """Get a distributed random sample of parameter configuration indices. Note: `get_LHS_random_sample_indices` is likely faster and better distributed.""" + if num_samples > self.size: + warn( + f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})" + ) + num_samples = round(self.size / 2) + if num_samples == self.size: + return np.shuffle([range(self.size)]) + + # adjust the number of random samples if necessary + sampling_factor = max(1, sampling_factor) + num_random_samples = min(sampling_factor * num_samples, self.size) + if num_random_samples == self.size or num_random_samples <= 1: + return self.get_random_sample(num_random_samples) + random_samples_indices = self.get_random_sample_indices(num_random_samples) + + # calculate the desired parameter configuration indices, starting at the edges of the parameter indices and halving each time + def get_next_sample(lower: tuple, upper: tuple) -> tuple: + """Get the next sample indices by halving the range between upper and lower bounds.""" + half = tuple(round((l + u) / 2) for l, u in zip(lower, upper)) + if half == lower or half == upper: + # if the range is too small to make a difference, pick one of the bounds and replace one random index with an index of the other + random_sample = list(choice([lower, upper])) + random_index = randint(0, self.num_params-1) + random_sample[random_index] = lower[random_index] if random_sample[random_index] == upper[random_index] else upper[random_index] + return tuple(random_sample) + return half + + # seed the queue with the lower and upper bounds of the parameter indices + target_samples_param_indices = [] + target_samples_param_indices.append(tuple(0 for _ in range(self.num_params))) + target_samples_param_indices.append(tuple(len(self.params_values[i]) - 1 for i in range(self.num_params))) + queue = deque([(target_samples_param_indices[0], target_samples_param_indices[1])]) + + # do a binary search for the target sample indices, until we have enough samples + while len(target_samples_param_indices) < num_samples: + lower, upper = queue.popleft() + next_sample = get_next_sample(lower, upper) + target_samples_param_indices.append(next_sample) + queue.append((lower, next_sample)) + queue.append((next_sample, upper)) + + # filter out duplicate samples + target_samples_param_indices = list(set(target_samples_param_indices)) + + # for each of the target sample indices, calculate which parameter configuration is closest + if self.params_values_indices is None: + self.__prepare_neighbors_index() + target_sample_indices = list() + for target_sample_param_config_indices in target_samples_param_indices: + # calculate the absolute difference between the parameter value indices + abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) + # find the param config index where the difference is the smallest + sum_of_index_differences = np.sum(abs_index_difference, axis=1) + param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) + if param_index is not None: + # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration + sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + min_index_difference_index = np.argmin(sum_of_index_differences) + target_sample_indices.append(min_index_difference_index.item()) + + # filter out duplicate samples and replace with random ones + target_sample_indices = list(set(target_sample_indices)) + while len(target_sample_indices) < num_samples: + # if there are not enough unique samples, fill up with random samples + random_sample_indices = self.get_random_sample_indices(num_samples - len(target_sample_indices)) + target_sample_indices.extend(random_sample_indices.tolist()) + target_sample_indices = list(set(target_sample_indices)) + + return target_sample_indices + + def get_distributed_random_sample(self, num_samples: int, sampling_factor=10) -> List[tuple]: + """Get a distributed random sample of parameter configurations.""" + return self.get_param_configs_at_indices(self.get_distributed_random_sample_indices(num_samples, sampling_factor)) + + def get_LHS_sample_indices(self, num_samples: int) -> List[int]: + """Get a Latin Hypercube sample of parameter configuration indices.""" + if num_samples > self.size: + warn( + f"Too many samples requested ({num_samples}), reducing the number of samples to half of the searchspace size ({self.size})" + ) + num_samples = round(self.size / 2) + if num_samples == self.size: + return np.shuffle([range(self.size)]) + if self.params_values_indices is None: + self.__prepare_neighbors_index() + + # get the Latin Hypercube of samples + target_samples_param_indices = LatinHypercube(len(self.params_values)).integers( + l_bounds=self.get_param_indices_lower_bounds(), + u_bounds=self.get_param_indices_upper_bounds(), + n=num_samples, + endpoint=True) + target_samples_param_indices = np.array(target_samples_param_indices, dtype=self.params_values_indices.dtype) + + # for each of the target sample indices, calculate which parameter configuration is closest + target_sample_indices = list() + for target_sample_param_config_indices in target_samples_param_indices: + # calculate the absolute difference between the parameter value indices + abs_index_difference = np.abs(self.params_values_indices - target_sample_param_config_indices, dtype=self.params_values_indices.dtype) + # find the param config index where the difference is the smallest + sum_of_index_differences = np.sum(abs_index_difference, axis=1) + param_index = self.get_param_config_index(self.get_param_config_from_param_indices(target_sample_param_config_indices)) + if param_index is not None: + # set the sum of index differences to infinity for the parameter index to avoid returning the same parameter configuration + sum_of_index_differences[param_index] = self.get_list_param_indices_numpy_max() + min_index_difference_index = np.argmin(sum_of_index_differences) + target_sample_indices.append(min_index_difference_index.item()) + + # filter out duplicate samples and replace with random ones + target_sample_indices = list(set(target_sample_indices)) + while len(target_sample_indices) < num_samples: + # if there are not enough unique samples, fill up with random samples + random_sample_indices = self.get_random_sample_indices(num_samples - len(target_sample_indices)) + target_sample_indices.extend(random_sample_indices.tolist()) + target_sample_indices = list(set(target_sample_indices)) + + return target_sample_indices + + def get_LHS_sample(self, num_samples: int) -> List[tuple]: + """Get a distributed random sample of parameter configurations.""" + return self.get_param_configs_at_indices(self.get_LHS_sample_indices(num_samples)) + + def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]: """Get the neighbors indices for a parameter configuration (does not check running cache, useful when mixing neighbor methods).""" param_config_index = self.get_param_config_index(param_config) - # this is the simplest case, just return the cached value - if self.build_neighbors_index and param_config_index is not None: - if neighbor_method is not None and neighbor_method != self.neighbor_method: - raise ValueError( - f"The neighbor method {neighbor_method} differs from the neighbor method {self.neighbor_method} initially used for indexing" - ) - return self.neighbors_index[param_config_index] - # check if there is a neighbor method to use if neighbor_method is None: if self.neighbor_method is None: raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set") neighbor_method = self.neighbor_method + # this is the simplest case, just return the cached value + if param_config_index is not None: + if neighbor_method in self.neighbors_index: + return self.neighbors_index[neighbor_method][param_config_index] + elif build_full_cache: + # build the neighbors index for the given neighbor method + self.neighbors_index[neighbor_method] = self.__build_neighbors_index(neighbor_method) + return self.neighbors_index[neighbor_method][param_config_index] + if neighbor_method == "Hamming": return self.__get_neighbors_indices_hamming(param_config) @@ -755,9 +1323,11 @@ def get_neighbors_indices_no_cache(self, param_config: tuple, neighbor_method=No return self.__get_neighbors_indices_strictlyadjacent(param_config_index, param_config) if neighbor_method == "adjacent": return self.__get_neighbors_indices_adjacent(param_config_index, param_config) + if neighbor_method == "closest-param-indices": + return self.__get_neighbor_indices_closest_param_indices(param_config, param_config_index) raise ValueError(f"The neighbor method {neighbor_method} is not in {supported_neighbor_methods}") - def get_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> List[int]: + def get_neighbors_indices(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[int]: """Get the neighbors indices for a parameter configuration, cached if requested before.""" if neighbor_method is None: neighbor_method = self.neighbor_method @@ -766,8 +1336,12 @@ def get_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> Li neighbors = self.__neighbor_cache[neighbor_method].get(param_config, None) # if there are no cached neighbors, compute them if neighbors is None: - neighbors = self.get_neighbors_indices_no_cache(param_config, neighbor_method) + neighbors = self.get_neighbors_indices_no_cache(param_config, neighbor_method, build_full_cache) self.__neighbor_cache[neighbor_method][param_config] = neighbors + self.__add_to_neighbor_partial_cache(param_config, neighbors, neighbor_method, full_neighbors=True) + if neighbor_method == "strictly-adjacent": + # any neighbor in strictly-adjacent is also an adjacent neighbor + self.__add_to_neighbor_partial_cache(param_config, neighbors, "adjacent", full_neighbors=False) return neighbors def are_neighbors_indices_cached(self, param_config: tuple, neighbor_method=None) -> bool: @@ -782,9 +1356,69 @@ def get_neighbors_no_cache(self, param_config: tuple, neighbor_method=None) -> L """Get the neighbors for a parameter configuration (does not check running cache, useful when mixing neighbor methods).""" return self.get_param_configs_at_indices(self.get_neighbors_indices_no_cache(param_config, neighbor_method)) - def get_neighbors(self, param_config: tuple, neighbor_method=None) -> List[tuple]: + def get_neighbors(self, param_config: tuple, neighbor_method=None, build_full_cache=False) -> List[tuple]: """Get the neighbors for a parameter configuration.""" - return self.get_param_configs_at_indices(self.get_neighbors_indices(param_config, neighbor_method)) + return self.get_param_configs_at_indices(self.get_neighbors_indices(param_config, neighbor_method, build_full_cache)) + + def get_partial_neighbors_indices(self, param_config: tuple, neighbor_method=None) -> List[tuple]: + """Get the partial neighbors for a parameter configuration.""" + if neighbor_method is None: + neighbor_method = self.neighbor_method + if neighbor_method is None: + raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set") + param_config_index = self.get_param_config_index(param_config) + if param_config_index is None or param_config_index not in self.__neighbor_partial_cache[neighbor_method]: + return [] + return self.get_param_configs_at_indices(self.__neighbor_partial_cache[neighbor_method][param_config_index]) + + def pop_random_partial_neighbor(self, param_config: tuple, neighbor_method=None, threshold=2) -> tuple: + """Pop a random partial neighbor for a given a parameter configuration if there are at least `threshold` neighbors.""" + if neighbor_method is None: + neighbor_method = self.neighbor_method + if neighbor_method is None: + raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set") + param_config_index = self.get_param_config_index(param_config) + if param_config_index is None or param_config_index not in self.__neighbor_partial_cache[neighbor_method]: + return None + partial_neighbors = self.get_param_configs_at_indices(self.__neighbor_partial_cache[neighbor_method][param_config_index]) + if len(partial_neighbors) < threshold: + return None + partial_neighbor_index = choice(range(len(partial_neighbors))) + random_neighbor = self.__neighbor_partial_cache[neighbor_method][param_config_index].pop(partial_neighbor_index) + return self.get_param_configs_at_indices([random_neighbor])[0] + + def get_random_neighbor(self, param_config: tuple, neighbor_method=None, use_partial_cache=True) -> tuple: + """Get an approximately random neighbor for a parameter configuration. Much faster than taking a random choice of all neighbors, but does not build full cache.""" + if self.are_neighbors_indices_cached(param_config, neighbor_method): + neighbors = self.get_neighbors(param_config, neighbor_method) + return choice(neighbors) if len(neighbors) > 0 else None + elif use_partial_cache: + # pop the chosen neighbor from the cache to avoid choosing it again until it is re-added + random_neighbor = self.pop_random_partial_neighbor(param_config, neighbor_method) + if random_neighbor is not None: + return random_neighbor + + # check if there is a neighbor method to use + if neighbor_method is None: + neighbor_method = self.neighbor_method + if neighbor_method is None: + raise ValueError("Neither the neighbor_method argument nor self.neighbor_method was set") + + # oddly enough, the custom random neighbor methods are not faster than just generating all neighbor + partials + # # find the random neighbor based on the method + # if neighbor_method == "adjacent": + # return self.__get_random_neighbor_adjacent(param_config) + # elif neighbor_method == "Hamming": + # this implementation is not as efficient as just generating all neighbors + # return self.__get_random_neighbor_hamming(param_config) + # # else: + # # not much performance to be gained for strictly-adjacent neighbors, just generate the neighbors + + # calculate the full neighbors and return a random one + neighbors = self.get_neighbors(param_config, neighbor_method) + if len(neighbors) == 0: + return None + return choice(neighbors) def get_param_neighbors(self, param_config: tuple, index: int, neighbor_method: str, randomize: bool) -> list: """Get the neighboring parameters at an index.""" @@ -837,3 +1471,39 @@ def order_param_configs( f"The number of ordered parameter configurations ({len(ordered_param_configs)}) differs from the original number of parameter configurations ({len(param_configs)})" ) return ordered_param_configs + + def to_ax_searchspace(self): + """Convert this searchspace to an Ax SearchSpace.""" + from ax import ChoiceParameter, FixedParameter, ParameterType, SearchSpace + + # create searchspace + ax_searchspace = SearchSpace([]) + + # add the parameters + for param_name, param_values in self.tune_params.items(): + if len(param_values) == 0: + continue + + # convert the types + assert all( + isinstance(param_values[0], type(v)) for v in param_values + ), f"Parameter values of mixed types are not supported: {param_values}" + param_type_mapping = { + str: ParameterType.STRING, + int: ParameterType.INT, + float: ParameterType.FLOAT, + bool: ParameterType.BOOL, + } + param_type = param_type_mapping[type(param_values[0])] + + # add the parameter + if len(param_values) == 1: + ax_searchspace.add_parameter(FixedParameter(param_name, param_type, param_values[0])) + else: + ax_searchspace.add_parameter(ChoiceParameter(param_name, param_type, param_values)) + + # add the constraints + raise NotImplementedError( + "Conversion to Ax SearchSpace has not been fully implemented as Ax Searchspaces can't capture full complexity." + ) + # return ax_searchspace diff --git a/kernel_tuner/strategies/basinhopping.py b/kernel_tuner/strategies/basinhopping.py index 20e800f6e..eed906676 100644 --- a/kernel_tuner/strategies/basinhopping.py +++ b/kernel_tuner/strategies/basinhopping.py @@ -1,7 +1,7 @@ """The strategy that uses the basinhopping global optimization method.""" import scipy.optimize -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options @@ -31,7 +31,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): try: opt_result = scipy.optimize.basinhopping(cost_func, x0, T=T, stepsize=eps, minimizer_kwargs=minimizer_kwargs, disp=tuning_options.verbose) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/bayes_opt.py b/kernel_tuner/strategies/bayes_opt.py index d0269e02a..f155bcd01 100644 --- a/kernel_tuner/strategies/bayes_opt.py +++ b/kernel_tuner/strategies/bayes_opt.py @@ -237,7 +237,7 @@ def get_hyperparam(name: str, default, supported_values=list()): self.worst_value = np.inf self.argopt = np.argmin elif opt_direction == "max": - self.worst_value = np.NINF + self.worst_value = -np.inf self.argopt = np.argmax else: raise ValueError("Invalid optimization direction '{}'".format(opt_direction)) diff --git a/kernel_tuner/strategies/brute_force.py b/kernel_tuner/strategies/brute_force.py index e1e5fdb60..b7efc002c 100644 --- a/kernel_tuner/strategies/brute_force.py +++ b/kernel_tuner/strategies/brute_force.py @@ -1,4 +1,4 @@ -""" The default strategy that iterates through the whole parameter space """ +"""The default strategy that iterates through the whole parameter space.""" from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common diff --git a/kernel_tuner/strategies/common.py b/kernel_tuner/strategies/common.py index ceb771522..9ffe999b7 100644 --- a/kernel_tuner/strategies/common.py +++ b/kernel_tuner/strategies/common.py @@ -1,9 +1,11 @@ +"""Module for functionality that is commonly used throughout the strategies.""" + import logging import sys from time import perf_counter import numpy as np -import numbers +from scipy.spatial import distance from kernel_tuner import util from kernel_tuner.searchspace import Searchspace @@ -31,7 +33,9 @@ def get_strategy_docstring(name, strategy_options): """Generate docstring for a 'tune' method of a strategy.""" - return _docstring_template.replace("$NAME$", name).replace("$STRAT_OPT$", make_strategy_options_doc(strategy_options)) + return _docstring_template.replace("$NAME$", name).replace( + "$STRAT_OPT$", make_strategy_options_doc(strategy_options) + ) def make_strategy_options_doc(strategy_options): @@ -57,26 +61,57 @@ def get_options(strategy_options, options, unsupported=None): class CostFunc: - def __init__(self, searchspace: Searchspace, tuning_options, runner, *, scaling=False, snap=True): - self.runner = runner - self.snap = snap - self.scaling = scaling + """Class encapsulating the CostFunc method.""" + + def __init__( + self, + searchspace: Searchspace, + tuning_options, + runner, + *, + scaling=False, + snap=True, + return_invalid=False, + return_raw=None, + ): + """An abstract method to handle evaluation of configurations. + + Args: + searchspace: the Searchspace to evaluate on. + tuning_options: various tuning options. + runner: the runner to use. + scaling: whether to internally scale parameter values. Defaults to False. + snap: whether to snap given configurations to their closests equivalent in the space. Defaults to True. + return_invalid: whether to return the util.ErrorConfig of an invalid configuration. Defaults to False. + return_raw: returns (result, results[raw]). Key inferred from objective if set to True. Defaults to None. + """ self.searchspace = searchspace self.tuning_options = tuning_options if isinstance(self.tuning_options, dict): - self.tuning_options['max_fevals'] = min(tuning_options['max_fevals'] if 'max_fevals' in tuning_options else np.inf, searchspace.size) + self.tuning_options["max_fevals"] = min( + tuning_options["max_fevals"] if "max_fevals" in tuning_options else np.inf, searchspace.size + ) + self.runner = runner + self.scaling = scaling + self.snap = snap + self.return_invalid = return_invalid + self.return_raw = return_raw + if return_raw is True: + self.return_raw = f"{tuning_options['objective']}s" self.results = [] + self.budget_spent_fraction = 0.0 + def __call__(self, x, check_restrictions=True): """Cost function used by almost all strategies.""" self.runner.last_strategy_time = 1000 * (perf_counter() - self.runner.last_strategy_start_time) # error value to return for numeric optimizers that need a numerical value - logging.debug('_cost_func called') - logging.debug('x: ' + str(x)) + logging.debug("_cost_func called") + logging.debug("x: %s", str(x)) # check if max_fevals is reached or time limit is exceeded - util.check_stop_criterion(self.tuning_options) + self.budget_spent_fraction = util.check_stop_criterion(self.tuning_options) # snap values in x to nearest actual value for each parameter, unscale x if needed if self.snap: @@ -86,7 +121,7 @@ def __call__(self, x, check_restrictions=True): params = snap_to_nearest_config(x, self.searchspace.tune_params) else: params = x - logging.debug('params ' + str(params)) + logging.debug("params %s", str(params)) legal = True result = {} @@ -95,10 +130,21 @@ def __call__(self, x, check_restrictions=True): # else check if this is a legal (non-restricted) configuration if check_restrictions and self.searchspace.restrictions: legal = self.searchspace.is_param_config_valid(tuple(params)) + + if not legal: - params_dict = dict(zip(self.searchspace.tune_params.keys(), params)) - result = params_dict - result[self.tuning_options.objective] = util.InvalidConfig() + if "constraint_aware" in self.tuning_options.strategy_options and self.tuning_options.strategy_options["constraint_aware"]: + # attempt to repair + new_params = unscale_and_snap_to_nearest_valid(x, params, self.searchspace, self.tuning_options.eps) + if new_params: + params = new_params + legal = True + x_int = ",".join([str(i) for i in params]) + + if not legal: + params_dict = dict(zip(self.searchspace.tune_params.keys(), params)) + result = params_dict + result[self.tuning_options.objective] = util.InvalidConfig() if legal: # compile and benchmark this instance @@ -116,14 +162,20 @@ def __call__(self, x, check_restrictions=True): # get numerical return value, taking optimization direction into account return_value = result[self.tuning_options.objective] - - if isinstance(return_value, numbers.Number): - if self.tuning_options.objective_higher_is_better: - # flip the sign if higher means better - return_value = -return_value + if not isinstance(return_value, util.ErrorConfig): + # this is a valid configuration, so invert value in case of maximization + return_value = -return_value if self.tuning_options.objective_higher_is_better else return_value else: - # this is not a valid configuration, just return max - return_value = sys.float_info.max + # this is not a valid configuration, replace with float max if needed + if not self.return_invalid: + return_value = sys.float_info.max + + # include raw data in return if requested + if self.return_raw is not None: + try: + return return_value, result[self.return_raw] + except KeyError: + return return_value, [np.nan] return return_value @@ -138,6 +190,7 @@ def get_bounds_x0_eps(self): if "x0" in self.tuning_options.strategy_options: x0 = self.tuning_options.strategy_options.x0 + assert isinstance(x0, (tuple, list)) and len(x0) == len(values), f"Invalid x0: {x0}, expected number of parameters of `tune_params` to match ({len(values)})" else: x0 = None @@ -160,19 +213,22 @@ def get_bounds_x0_eps(self): eps = 1 self.tuning_options["eps"] = eps - logging.debug('get_bounds_x0_eps called') - logging.debug('bounds ' + str(bounds)) - logging.debug('x0 ' + str(x0)) - logging.debug('eps ' + str(eps)) + logging.debug("get_bounds_x0_eps called") + logging.debug("bounds %s", str(bounds)) + logging.debug("x0 %s", str(x0)) + logging.debug("eps %s", str(eps)) return bounds, x0, eps def get_bounds(self): """Create a bounds array from the tunable parameters.""" bounds = [] - for values in self.searchspace.tune_params.values(): - sorted_values = np.sort(values) - bounds.append((sorted_values[0], sorted_values[-1])) + for values in self.searchspace.params_values: + try: + bounds.append((min(values), max(values))) + except TypeError: + # if values are not numbers, use the first and last value as bounds + bounds.append((values[0], values[-1])) return bounds @@ -181,7 +237,7 @@ def setup_method_arguments(method, bounds): kwargs = {} # pass bounds to methods that support it if method in ["L-BFGS-B", "TNC", "SLSQP"]: - kwargs['bounds'] = bounds + kwargs["bounds"] = bounds return kwargs @@ -194,21 +250,21 @@ def setup_method_options(method, tuning_options): maxiter = tuning_options.strategy_options.maxiter else: maxiter = 100 - kwargs['maxiter'] = maxiter + kwargs["maxiter"] = maxiter if method in ["Nelder-Mead", "Powell"]: - kwargs['maxfev'] = maxiter + kwargs["maxfev"] = maxiter elif method == "L-BFGS-B": - kwargs['maxfun'] = maxiter + kwargs["maxfun"] = maxiter # pass eps to methods that support it if method in ["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"]: - kwargs['eps'] = tuning_options.eps + kwargs["eps"] = tuning_options.eps elif method == "COBYLA": - kwargs['rhobeg'] = tuning_options.eps + kwargs["rhobeg"] = tuning_options.eps # not all methods support 'disp' option - if method not in ['TNC']: - kwargs['disp'] = tuning_options.verbose + if method not in ["TNC"]: + kwargs["disp"] = tuning_options.verbose return kwargs @@ -255,5 +311,29 @@ def scale_from_params(params, tune_params, eps): """Helper func to do the inverse of the 'unscale' function.""" x = np.zeros(len(params)) for i, v in enumerate(tune_params.values()): - x[i] = 0.5 * eps + v.index(params[i])*eps + x[i] = 0.5 * eps + v.index(params[i]) * eps return x + + + +def unscale_and_snap_to_nearest_valid(x, params, searchspace, eps): + """Helper func to snap to the nearest valid configuration""" + # params is nearest unscaled point, but is not valid + neighbors = get_neighbors(params, searchspace) + + if neighbors: + # sort on distance to x + neighbors.sort(key=lambda y: distance.euclidean(x,scale_from_params(y, searchspace.tune_params, eps))) + + # return closest valid neighbor + return neighbors[0] + + return [] + + +def get_neighbors(params, searchspace): + for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: + neighbors = searchspace.get_neighbors(tuple(params), neighbor_method=neighbor_method) + if len(neighbors) > 0: + return neighbors + return [] diff --git a/kernel_tuner/strategies/diff_evo.py b/kernel_tuner/strategies/diff_evo.py index a2c9d9d00..888672d76 100644 --- a/kernel_tuner/strategies/diff_evo.py +++ b/kernel_tuner/strategies/diff_evo.py @@ -1,42 +1,409 @@ -"""The differential evolution strategy that optimizes the search through the parameter space.""" -from scipy.optimize import differential_evolution +"""A simple Different Evolution for parameter search.""" +import random +import re +import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached +from scipy.stats.qmc import LatinHypercube from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -supported_methods = ["best1bin", "best1exp", "rand1exp", "randtobest1exp", "best2exp", "rand2exp", "randtobest1bin", "best2bin", "rand2bin", "rand1bin"] +_options = dict( + popsize=("population size", 50), + popsize_times_dimensions=("multiply population size with number of dimensions (True/False)", False), + maxiter=("maximum number of generations", int(1e15)), # very large to avoid early stopping (stopping is managed by StopCriterionReached) + F=("mutation factor (differential weight)", 1.3), + CR=("crossover rate", 0.9), + method=("method", "best1bin"), + constraint_aware=("constraint-aware optimization (True/False)", True), +) -_options = dict(method=(f"Creation method for new population, any of {supported_methods}", "best1bin"), - popsize=("Population size", 20), - maxiter=("Number of generations", 100)) +supported_methods = [ + "best1bin", + "rand1bin", + "best2bin", + "rand2bin", + "best1exp", + "rand1exp", + "best2exp", + "rand2exp", + "currenttobest1bin", + "currenttobest1exp", + "randtobest1bin", + "randtobest1exp", +] def tune(searchspace: Searchspace, runner, tuning_options): - - method, popsize, maxiter = common.get_options(tuning_options.strategy_options, _options) - - # build a bounds array as needed for the optimizer cost_func = CostFunc(searchspace, tuning_options, runner) - bounds, x0, _ = cost_func.get_bounds_x0_eps() + bounds = cost_func.get_bounds() + + options = tuning_options.strategy_options + popsize, popsize_times_dimensions, maxiter, F, CR, method, constraint_aware = common.get_options(options, _options) + if popsize_times_dimensions: + popsize *= min(len(searchspace.get_true_tunable_params()), searchspace.size) + maxiter = min(maxiter, searchspace.size) - # ensure particles start from legal points - population = list(list(p) for p in searchspace.get_random_sample(popsize)) + if method not in supported_methods: + raise ValueError(f"Error {method} not supported, {supported_methods=}") - # call the differential evolution optimizer - opt_result = None try: - opt_result = differential_evolution(cost_func, bounds, maxiter=maxiter, popsize=popsize, init=population, - polish=False, strategy=method, disp=tuning_options.verbose, x0=x0) - except util.StopCriterionReached as e: + differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, constraint_aware, tuning_options.verbose) + except StopCriterionReached as e: if tuning_options.verbose: print(e) - if opt_result and tuning_options.verbose: - print(opt_result.message) - return cost_func.results tune.__doc__ = common.get_strategy_docstring("Differential Evolution", _options) + + +def values_to_indices(individual_values, tune_params): + """Converts an individual's values to its corresponding index vector.""" + idx = np.zeros(len(individual_values)) + for i, v in enumerate(tune_params.values()): + idx[i] = v.index(individual_values[i]) + return idx + + +def indices_to_values(individual_indices, tune_params): + """Converts an individual's index vector back to its values.""" + tune_params_list = list(tune_params.values()) + values = [] + for dim, idx in enumerate(individual_indices): + values.append(tune_params_list[dim][idx]) + return values + + +def parse_method(method): + """Helper func to parse the preferred method into its components.""" + pattern = r"^(best|rand|currenttobest|randtobest)(1|2)(bin|exp)$" + match = re.fullmatch(pattern, method) + + if match: + if match.group(1) in ["currenttobest", "randtobest"]: + mutation_method = mutation[match.group(1)] + else: + mutation_method = mutation[match.group(2)] + return match.group(1) == "best", int(match.group(2)), mutation_method, crossover[match.group(3)] + else: + raise ValueError("Error parsing differential evolution method") + + +def random_draw(idxs, mutate, best): + """ + Draw requested number of random individuals. + + Draw without replacement unless there is not enough to draw from. + """ + draw = 2 * mutate + 1 - int(best) + return np.random.choice(idxs, draw, replace=draw >= len(idxs)) + + +def generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware): + """Generate new population, returns Numpy array.""" + if constraint_aware: + population = [list(c) for c in searchspace.get_LHS_sample(popsize)] + else: + population = [] + for _ in range(popsize): + ind = [] + for key in tune_params: + ind.append(random.choice(tune_params[key])) + population.append(ind) + return population + + +def differential_evolution(searchspace, cost_func, bounds, popsize, maxiter, F, CR, method, constraint_aware, verbose): + """ + A basic implementation of the Differential Evolution algorithm. + + This function finds the minimum of a given cost function within specified bounds. + """ + tune_params = cost_func.tuning_options.tune_params + min_idx = np.zeros(len(tune_params)) + max_idx = [len(v) - 1 for v in tune_params.values()] + + best, mutation, mutation_method, crossover_method = parse_method(method) + + # --- 1. Initialization --- + + # Convert bounds to a numpy array for easier manipulation + bounds = np.array(bounds) + + # Initialize the population with random individuals within the bounds + population = generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware) + + # Override with user-specified starting position + population[0] = cost_func.get_start_pos() + + # Calculate the initial cost for each individual in the population + population_cost = np.array([cost_func(ind) for ind in population]) + + # Keep track of the best solution found so far + best_idx = np.argmin(population_cost) + best_solution = population[best_idx] + best_solution_idx = values_to_indices(best_solution, tune_params) + best_cost = population_cost[best_idx] + + # --- 2. Main Loop --- + + stabilized = 0 + + # Iterate through the specified number of generations + for generation in range(maxiter): + + # Trial population and vectors are stored as lists + # not Numpy arrays, to make it easy to check for duplicates + trial_population = [] + + # If for two generations there has been no change, generate a new population + if stabilized > 2: + trial_population = list(generate_population(tune_params, max_idx, popsize, searchspace, constraint_aware)) + + # Iterate over each individual in the population + i = 0 + stuck = 0 + while len(trial_population) < popsize: + + # --- a. Mutation --- + # Select three distinct random individuals (a, b, c) from the population, + # ensuring they are different from the current individual 'i'. + idxs = [idx for idx in range(popsize) if idx != i] + randos = random_draw(idxs, mutation, best) + + if mutation_method == mutate_currenttobest1: + randos[0] = i + + randos_idx = [values_to_indices(population[rando], tune_params) for rando in randos] + + # Apply mutation strategy + donor_vector_idx = mutation_method(best_solution_idx, randos_idx, F, min_idx, max_idx, best) + donor_vector = indices_to_values(donor_vector_idx, tune_params) + + # --- b. Crossover --- + trial_vector = crossover_method(donor_vector, population[i], CR) + + # Repair if constraint_aware + if constraint_aware: + trial_vector = repair(trial_vector, searchspace) + + # Store for selection, if not in trial_population already + if list(trial_vector) not in trial_population: + trial_population.append(list(trial_vector)) + i += 1 + stuck = 0 + else: + stuck += 1 + + if stuck >= 20: + if verbose: + print("Differential Evolution got stuck generating new individuals, insert random sample") + trial_population.append(list(searchspace.get_random_sample(1)[0])) + i += 1 + stuck = 0 + + + # --- c. Selection --- + + # Calculate the cost of the new trial vectors + trial_population_cost = np.array([cost_func(ind) for ind in trial_population]) + + # Keep track of whether population changes over time + no_change = True + + # Iterate over each individual in the trial population + for i in range(popsize): + + trial_vector = trial_population[i] + trial_cost = trial_population_cost[i] + + # If the trial vector has a lower or equal cost, it replaces the + # target vector in the population for the next generation. + if trial_cost <= population_cost[i]: + + # check if trial_vector is not already in population + if population.count(trial_vector) == 0: + population[i] = trial_vector + population_cost[i] = trial_cost + no_change = False + + # Update the overall best solution if the new one is better + if trial_cost < best_cost: + best_cost = trial_cost + best_solution = trial_vector + best_solution_idx = values_to_indices(best_solution, tune_params) + + # Note if population is stabilizing + if no_change: + stabilized += 1 + + # Print the progress at the end of the generation + if verbose: + print(f"Generation {generation + 1}, Best Cost: {best_cost:.6f}") + + if verbose: + print(f"Differential Evolution completed fevals={len(cost_func.tuning_options.unique_results)}") + + return {"solution": best_solution, "cost": best_cost} + + +def round_and_clip(mutant_idx_float, min_idx, max_idx): + """Helper func to round floating index to nearest integer and clip within bounds.""" + # Round to the nearest integer + rounded_idx = np.round(mutant_idx_float) + + # Clip the indices to ensure they are within valid index bounds + clipped_idx = np.clip(rounded_idx, min_idx, max_idx) + + # Convert final mutant vector to integer type + return clipped_idx.astype(int) + + +def mutate_currenttobest1(best_idx, randos_idx, F, min_idx, max_idx, best): + """ + Performs the DE/1 currenttobest1 mutation strategy. + + This function operates on the indices of the parameters, not their actual values. + The formula v = cur + F * (best - cur + a - b) is applied to the indices, and the result is + then rounded and clipped to ensure it remains a valid index. + """ + cur_idx, b_idx, c_idx = randos_idx + + # Apply the DE/currenttobest/1 formula to the indices + mutant_idx_float = cur_idx + F * (best_idx - cur_idx + b_idx - c_idx) + + return round_and_clip(mutant_idx_float, min_idx, max_idx) + + +def mutate_randtobest1(best_idx, randos_idx, F, min_idx, max_idx, best): + """ + Performs the DE/1 randtobest1 mutation strategy. + + This function operates on the indices of the parameters, not their actual values. + The formula v = a + F * (best - a + b - c) is applied to the indices, and the result is + then rounded and clipped to ensure it remains a valid index. + """ + a_idx, b_idx, c_idx = randos_idx + + # Apply the DE/currenttobest/1 formula to the indices + mutant_idx_float = a_idx + F * (best_idx - a_idx + b_idx - c_idx) + + return round_and_clip(mutant_idx_float, min_idx, max_idx) + + +def mutate_de_1(best_idx, randos_idx, F, min_idx, max_idx, best): + """ + Performs the DE/1 mutation strategy. + + This function operates on the indices of the parameters, not their actual values. + The formula v = a + F * (b - c) is applied to the indices, and the result is + then rounded and clipped to ensure it remains a valid index. + + """ + if best: + a_idx = best_idx + b_idx, c_idx = randos_idx + else: + a_idx, b_idx, c_idx = randos_idx + + # Apply the DE/rand/1 formula to the indices + mutant_idx_float = a_idx + F * (b_idx - c_idx) + + return round_and_clip(mutant_idx_float, min_idx, max_idx) + + +def mutate_de_2(best_idx, randos_idx, F, min_idx, max_idx, best): + """ + Performs the DE/2 mutation strategy for a discrete search space. + + This function operates on the indices of the parameters, not their actual values. + The formula v = a + F1 * (b - c) + F2 * (d - e) is applied to the indices, + and the result is then rounded and clipped to ensure it remains a valid index. + + """ + if best: + a_idx = best_idx + b_idx, c_idx, d_idx, e_idx = randos_idx + else: + a_idx, b_idx, c_idx, d_idx, e_idx = randos_idx + + # Apply the DE/2 formula to the indices + mutant_idx_float = a_idx + F * (b_idx + c_idx - d_idx - e_idx) + + return round_and_clip(mutant_idx_float, min_idx, max_idx) + + +def binomial_crossover(donor_vector, target, CR): + """Performs binomial crossover of donor_vector with target given crossover rate CR.""" + # Create the trial vector by mixing parameters from the target and donor vectors + trial_vector = target.copy() + dimensions = len(donor_vector) + + # Generate a random array of floats for comparison with the crossover rate CR + crossover_points = np.random.rand(dimensions) < CR + + # Ensure at least one parameter is taken from the donor vector + # to prevent the trial vector from being identical to the target vector. + if not np.any(crossover_points): + crossover_points[np.random.randint(0, dimensions)] = True + + # Apply crossover + for i, d in enumerate(donor_vector): + if crossover_points[i]: + trial_vector[i] = donor_vector[i] + + return trial_vector + + +def exponential_crossover(donor_vector, target, CR): + """ + Performs exponential crossover for a discrete search space. + + This creates a trial vector by taking a contiguous block of parameters + from the donor vector and the rest from the target vector. + """ + dimensions = len(target) + trial_vector = target.copy() + + # 1. Select a random starting point for the crossover block. + start_point = np.random.randint(0, dimensions) + + # 2. Determine the length of the block to be copied from the mutant. + # The loop continues as long as random numbers are less than CR. + # This ensures at least one parameter is always taken from the mutant. + l = 0 + while np.random.rand() < CR and l < dimensions: + crossover_point = (start_point + l) % dimensions + trial_vector[crossover_point] = donor_vector[crossover_point] + l += 1 + + return trial_vector + + +def repair(trial_vector, searchspace): + """ + Attempts to repair trial_vector if trial_vector is invalid + """ + if not searchspace.is_param_config_valid(tuple(trial_vector)): + # search for valid configurations neighboring trial_vector + for neighbor_method in ["closest-param-indices"]: + # start from strictly-adjacent to increasingly allowing more neighbors + # for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: + new_trial_vector = searchspace.get_random_neighbor(tuple(trial_vector), neighbor_method=neighbor_method) + if new_trial_vector is not None: + # print(f"Differential evolution resulted in invalid config {trial_vector=}, repaired to {new_trial_vector=}") + return list(new_trial_vector) + + return trial_vector + + +mutation = { + "1": mutate_de_1, + "2": mutate_de_2, + "currenttobest": mutate_currenttobest1, + "randtobest": mutate_randtobest1, +} +crossover = {"bin": binomial_crossover, "exp": exponential_crossover} diff --git a/kernel_tuner/strategies/dual_annealing.py b/kernel_tuner/strategies/dual_annealing.py index 0f44bd849..598151ea5 100644 --- a/kernel_tuner/strategies/dual_annealing.py +++ b/kernel_tuner/strategies/dual_annealing.py @@ -1,18 +1,19 @@ """The strategy that uses the dual annealing optimization method.""" import scipy.optimize -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, setup_method_arguments, setup_method_options supported_methods = ['COBYLA', 'L-BFGS-B', 'SLSQP', 'CG', 'Powell', 'Nelder-Mead', 'BFGS', 'trust-constr'] -_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "Powell")) +_options = dict(method=(f"Local optimization method to use, choose any from {supported_methods}", "COBYLA")) def tune(searchspace: Searchspace, runner, tuning_options): - method = common.get_options(tuning_options.strategy_options, _options)[0] + _options["max_fevals"] = ("", searchspace.size) + method, max_fevals = common.get_options(tuning_options.strategy_options, _options) #scale variables in x to make 'eps' relevant for multiple variables cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True) @@ -29,8 +30,8 @@ def tune(searchspace: Searchspace, runner, tuning_options): opt_result = None try: - opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0) - except util.StopCriterionReached as e: + opt_result = scipy.optimize.dual_annealing(cost_func, bounds, minimizer_kwargs=minimizer_kwargs, x0=x0, maxfun=max_fevals) + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/firefly_algorithm.py b/kernel_tuner/strategies/firefly_algorithm.py index 821b55ef6..a732d4041 100644 --- a/kernel_tuner/strategies/firefly_algorithm.py +++ b/kernel_tuner/strategies/firefly_algorithm.py @@ -3,7 +3,7 @@ import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, scale_from_params @@ -13,7 +13,8 @@ maxiter=("Maximum number of iterations", 100), B0=("Maximum attractiveness", 1.0), gamma=("Light absorption coefficient", 1.0), - alpha=("Randomization parameter", 0.2)) + alpha=("Randomization parameter", 0.2), + constraint_aware=("constraint-aware optimization (True/False)", True)) def tune(searchspace: Searchspace, runner, tuning_options): @@ -23,7 +24,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): # using this instead of get_bounds because scaling is used bounds, x0, eps = cost_func.get_bounds_x0_eps() - num_particles, maxiter, B0, gamma, alpha = common.get_options(tuning_options.strategy_options, _options) + num_particles, maxiter, B0, gamma, alpha, constraint_aware = common.get_options(tuning_options.strategy_options, _options) best_score_global = sys.float_info.max best_position_global = [] @@ -34,9 +35,10 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm.append(Firefly(bounds)) # ensure particles start from legal points - population = list(list(p) for p in searchspace.get_random_sample(num_particles)) - for i, particle in enumerate(swarm): - particle.position = scale_from_params(population[i], searchspace.tune_params, eps) + if constraint_aware: + population = list(list(p) for p in searchspace.get_random_sample(num_particles)) + for i, particle in enumerate(swarm): + particle.position = scale_from_params(population[i], searchspace.tune_params, eps) # include user provided starting point swarm[0].position = x0 @@ -45,7 +47,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): for j in range(num_particles): try: swarm[j].compute_intensity(cost_func) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -68,7 +70,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm[i].move_towards(swarm[j], beta, alpha) try: swarm[i].compute_intensity(cost_func) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results diff --git a/kernel_tuner/strategies/genetic_algorithm.py b/kernel_tuner/strategies/genetic_algorithm.py index 9ab5d5ad6..2da9d356f 100644 --- a/kernel_tuner/strategies/genetic_algorithm.py +++ b/kernel_tuner/strategies/genetic_algorithm.py @@ -1,42 +1,54 @@ """A simple genetic algorithm for parameter search.""" + import random import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached, get_best_config from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc _options = dict( popsize=("population size", 20), - maxiter=("maximum number of generations", 100), - method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "uniform"), - mutation_chance=("chance to mutate is 1 in mutation_chance", 10), + maxiter=("maximum number of generations", 150), + method=("crossover method to use, choose any from single_point, two_point, uniform, disruptive_uniform", "single_point"), + mutation_chance=("chance to mutate is 1 in mutation_chance", 5), + constraint_aware=("constraint-aware optimization (True/False)", True), ) def tune(searchspace: Searchspace, runner, tuning_options): options = tuning_options.strategy_options - pop_size, generations, method, mutation_chance = common.get_options(options, _options) - crossover = supported_methods[method] + pop_size, generations, method, mutation_chance, constraint_aware = common.get_options(options, _options) + + # if necessary adjust the popsize to a sensible value based on search space size + if pop_size < 2 or pop_size > np.floor(searchspace.size / 2): + pop_size = min(max(round((searchspace.size / generations) * 3), 2), pop_size) + + GA = GeneticAlgorithm(pop_size, searchspace, method, mutation_chance, constraint_aware) best_score = 1e20 cost_func = CostFunc(searchspace, tuning_options, runner) + num_evaluated = 0 - population = list(list(p) for p in searchspace.get_random_sample(pop_size)) + population = GA.generate_population() population[0] = cost_func.get_start_pos() for generation in range(generations): + if constraint_aware and any([not searchspace.is_param_config_valid(tuple(dna)) for dna in population]): + raise ValueError(f"Generation {generation}/{generations}, population validity: {[searchspace.is_param_config_valid(tuple(dna)) for dna in population]}") # determine fitness of population members weighted_population = [] for dna in population: try: - time = cost_func(dna, check_restrictions=False) - except util.StopCriterionReached as e: + # if we are not constraint-aware we should check restrictions upon evaluation + time = cost_func(dna, check_restrictions=not constraint_aware) + num_evaluated += 1 + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -48,23 +60,26 @@ def tune(searchspace: Searchspace, runner, tuning_options): # 'best_score' is used only for printing if tuning_options.verbose and cost_func.results: - best_score = util.get_best_config(cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better)[tuning_options.objective] + best_score = get_best_config( + cost_func.results, tuning_options.objective, tuning_options.objective_higher_is_better + )[tuning_options.objective] if tuning_options.verbose: print("Generation %d, best_score %f" % (generation, best_score)) + # build new population for next generation population = [] # crossover and mutate - while len(population) < pop_size: - dna1, dna2 = weighted_choice(weighted_population, 2) + while len(population) < pop_size and searchspace.size > num_evaluated + len(population): + dna1, dna2 = GA.weighted_choice(weighted_population, 2) - children = crossover(dna1, dna2) + children = GA.crossover(dna1, dna2) for child in children: - child = mutate(child, mutation_chance, searchspace) + child = GA.mutate(child) - if child not in population and searchspace.is_param_config_valid(tuple(child)): + if child not in population and (not constraint_aware or searchspace.is_param_config_valid(tuple(child))): population.append(child) if len(population) >= pop_size: @@ -77,57 +92,112 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Genetic Algorithm", _options) +class GeneticAlgorithm: + + def __init__(self, pop_size, searchspace, method="uniform", mutation_chance=10, constraint_aware=True): + self.pop_size = pop_size + self.searchspace = searchspace + self.tune_params = searchspace.tune_params.copy() + self.crossover_method = supported_methods[method] + self.mutation_chance = mutation_chance + self.constraint_aware = constraint_aware -def weighted_choice(population, n): - """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected.""" - - def random_index_betavariate(pop_size): - # has a higher probability of returning index of item at the head of the list - alpha = 1 - beta = 2.5 - return int(random.betavariate(alpha, beta) * pop_size) - - def random_index_weighted(pop_size): - """Use weights to increase probability of selection.""" - weights = [w for _, w in population] - # invert because lower is better - inverted_weights = [1.0 / w for w in weights] - prefix_sum = np.cumsum(inverted_weights) - total_weight = sum(inverted_weights) - randf = random.random() * total_weight - # return first index of prefix_sum larger than random number - return next(i for i, v in enumerate(prefix_sum) if v > randf) - - random_index = random_index_betavariate - - indices = [random_index(len(population)) for _ in range(n)] - chosen = [] - for ind in indices: - while ind in chosen: - ind = random_index(len(population)) - chosen.append(ind) - - return [population[ind][0] for ind in chosen] - - -def mutate(dna, mutation_chance, searchspace: Searchspace, cache=True): - """Mutate DNA with 1/mutation_chance chance.""" - # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list - if int(random.random() * mutation_chance) == 0: - if cache: - neighbors = searchspace.get_neighbors(tuple(dna), neighbor_method="Hamming") + def generate_population(self): + """ Constraint-aware population creation method """ + if self.constraint_aware: + pop = list(list(p) for p in self.searchspace.get_random_sample(self.pop_size)) else: - neighbors = searchspace.get_neighbors_no_cache(tuple(dna), neighbor_method="Hamming") - if len(neighbors) > 0: - return list(random.choice(neighbors)) - return dna + pop = [] + dna_size = len(self.tune_params) + for _ in range(self.pop_size): + dna = [] + for key in self.tune_params: + dna.append(random.choice(self.tune_params[key])) + pop.append(dna) + return pop + + def crossover(self, dna1, dna2): + """ Apply selected crossover method, repair dna if constraint-aware """ + dna1, dna2 = self.crossover_method(dna1, dna2) + if self.constraint_aware: + return self.repair(dna1), self.repair(dna2) + return dna1, dna2 + + def weighted_choice(self, population, n): + """Randomly select n unique individuals from a weighted population, fitness determines probability of being selected.""" + + def random_index_betavariate(pop_size): + # has a higher probability of returning index of item at the head of the list + alpha = 1 + beta = 2.5 + return int(random.betavariate(alpha, beta) * pop_size) + + def random_index_weighted(pop_size): + """Use weights to increase probability of selection.""" + weights = [w for _, w in population] + # invert because lower is better + inverted_weights = [1.0 / w for w in weights] + prefix_sum = np.cumsum(inverted_weights) + total_weight = sum(inverted_weights) + randf = random.random() * total_weight + # return first index of prefix_sum larger than random number + return next(i for i, v in enumerate(prefix_sum) if v > randf) + + random_index = random_index_betavariate + + indices = [random_index(len(population)) for _ in range(n)] + chosen = [] + for ind in indices: + while ind in chosen: + ind = random_index(len(population)) + chosen.append(ind) + + return [population[ind][0] for ind in chosen] + + + def mutate(self, dna): + """Mutate DNA with 1/mutation_chance chance.""" + # this is actually a neighbors problem with Hamming distance, choose randomly from returned searchspace list + if int(random.random() * self.mutation_chance) == 0: + if self.constraint_aware: + neighbor = self.searchspace.get_random_neighbor(tuple(dna), neighbor_method="Hamming") + if neighbor is not None: + return list(neighbor) + else: + # select a tunable parameter at random + mutate_index = random.randint(0, len(self.tune_params)-1) + mutate_key = list(self.tune_params.keys())[mutate_index] + # get all possible values for this parameter and remove current value + new_val_options = self.tune_params[mutate_key].copy() + new_val_options.remove(dna[mutate_index]) + # pick new value at random + if len(new_val_options) > 0: + new_val = random.choice(new_val_options) + dna[mutate_index] = new_val + return dna + + + def repair(self, dna): + """ It is possible that crossover methods yield a configuration that is not valid. """ + if not self.searchspace.is_param_config_valid(tuple(dna)): + # dna is not valid, try to repair it + # search for valid configurations neighboring this config + # start from strictly-adjacent to increasingly allowing more neighbors + for neighbor_method in ["strictly-adjacent", "adjacent", "Hamming"]: + neighbor = self.searchspace.get_random_neighbor(tuple(dna), neighbor_method=neighbor_method) + # if we have found valid neighboring configurations, select one at random + if neighbor is not None: + # print(f"GA crossover resulted in invalid config {dna=}, repaired dna to {neighbor=}") + return list(neighbor) + + return dna def single_point_crossover(dna1, dna2): """Crossover dna1 and dna2 at a random index.""" # check if you can do the crossovers using the neighbor index: check which valid parameter configuration is closest to the crossover, probably best to use "adjacent" as it is least strict? pos = int(random.random() * (len(dna1))) - return (dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:]) + return dna1[:pos] + dna2[pos:], dna2[:pos] + dna1[pos:] def two_point_crossover(dna1, dna2): @@ -139,7 +209,7 @@ def two_point_crossover(dna1, dna2): pos1, pos2 = sorted(random.sample(list(range(start, end)), 2)) child1 = dna1[:pos1] + dna2[pos1:pos2] + dna1[pos2:] child2 = dna2[:pos1] + dna1[pos1:pos2] + dna2[pos2:] - return (child1, child2) + return child1, child2 def uniform_crossover(dna1, dna2): @@ -170,7 +240,7 @@ def disruptive_uniform_crossover(dna1, dna2): child1[ind] = dna2[ind] child2[ind] = dna1[ind] swaps += 1 - return (child1, child2) + return child1, child2 supported_methods = { @@ -179,3 +249,4 @@ def disruptive_uniform_crossover(dna1, dna2): "uniform": uniform_crossover, "disruptive_uniform": disruptive_uniform_crossover, } + diff --git a/kernel_tuner/strategies/greedy_ils.py b/kernel_tuner/strategies/greedy_ils.py index 1906f730c..d9cf67ecc 100644 --- a/kernel_tuner/strategies/greedy_ils.py +++ b/kernel_tuner/strategies/greedy_ils.py @@ -1,9 +1,10 @@ """A simple greedy iterative local search algorithm for parameter search.""" -from kernel_tuner import util +from random import choice as random_choice + +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -from kernel_tuner.strategies.genetic_algorithm import mutate from kernel_tuner.strategies.hillclimbers import base_hillclimb _options = dict(neighbor=("Method for selecting neighboring nodes, choose from Hamming or adjacent", "Hamming"), @@ -40,7 +41,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): try: candidate = base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=True) new_score = cost_func(candidate, check_restrictions=False) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -58,9 +59,13 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Greedy Iterative Local Search (ILS)", _options) +def mutate(indiv, searchspace: Searchspace): + return list(searchspace.get_random_neighbor(tuple(indiv), neighbor_method="Hamming")) + + def random_walk(indiv, permutation_size, no_improve, last_improve, searchspace: Searchspace): if last_improve >= no_improve: return searchspace.get_random_sample(1)[0] for _ in range(permutation_size): - indiv = mutate(indiv, 0, searchspace, cache=False) + indiv = mutate(indiv, searchspace) return indiv diff --git a/kernel_tuner/strategies/greedy_mls.py b/kernel_tuner/strategies/greedy_mls.py index a651e11d7..4edd2f0a4 100644 --- a/kernel_tuner/strategies/greedy_mls.py +++ b/kernel_tuner/strategies/greedy_mls.py @@ -1,5 +1,5 @@ """A greedy multi-start local search algorithm for parameter search.""" -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.hillclimbers import base_hillclimb @@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): while fevals < max_fevals: try: base_hillclimb(candidate, neighbor, max_fevals, searchspace, tuning_options, cost_func, restart=restart, randomize=randomize, order=order) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results diff --git a/kernel_tuner/strategies/minimize.py b/kernel_tuner/strategies/minimize.py index 80c1c6f82..71929a040 100644 --- a/kernel_tuner/strategies/minimize.py +++ b/kernel_tuner/strategies/minimize.py @@ -2,7 +2,7 @@ import scipy.optimize -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies.common import ( CostFunc, @@ -30,7 +30,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): opt_result = None try: opt_result = scipy.optimize.minimize(cost_func, x0, method=method, options=options, **kwargs) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) diff --git a/kernel_tuner/strategies/pso.py b/kernel_tuner/strategies/pso.py index 0834f52c0..ec7efc2ee 100644 --- a/kernel_tuner/strategies/pso.py +++ b/kernel_tuner/strategies/pso.py @@ -1,29 +1,33 @@ """The strategy that uses particle swarm optimization.""" + import random import sys import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc, scale_from_params -_options = dict(popsize=("Population size", 20), - maxiter=("Maximum number of iterations", 100), - w=("Inertia weight constant", 0.5), - c1=("Cognitive constant", 2.0), - c2=("Social constant", 1.0)) +_options = dict( + popsize=("Population size", 30), + maxiter=("Maximum number of iterations", 100), + w=("Inertia weight constant", 0.5), + c1=("Cognitive constant", 3.0), + c2=("Social constant", 0.5), + constraint_aware=("constraint-aware optimization (True/False)", True)) def tune(searchspace: Searchspace, runner, tuning_options): - #scale variables in x because PSO works with velocities to visit different configurations + # scale variables in x because PSO works with velocities to visit different configurations cost_func = CostFunc(searchspace, tuning_options, runner, scaling=True) - #using this instead of get_bounds because scaling is used + # using this instead of get_bounds because scaling is used bounds, x0, eps = cost_func.get_bounds_x0_eps() - num_particles, maxiter, w, c1, c2 = common.get_options(tuning_options.strategy_options, _options) + num_particles, maxiter, w, c1, c2, constraint_aware = common.get_options(tuning_options.strategy_options, _options) + num_particles = min(round(searchspace.size / 2), num_particles) best_score_global = sys.float_info.max best_position_global = [] @@ -34,9 +38,10 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm.append(Particle(bounds)) # ensure particles start from legal points - population = list(list(p) for p in searchspace.get_random_sample(num_particles)) - for i, particle in enumerate(swarm): - particle.position = scale_from_params(population[i], searchspace.tune_params, eps) + if constraint_aware: + population = list(list(p) for p in searchspace.get_random_sample(num_particles)) + for i, particle in enumerate(swarm): + particle.position = scale_from_params(population[i], searchspace.tune_params, eps) # include user provided starting point swarm[0].position = x0 @@ -50,7 +55,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): for j in range(num_particles): try: swarm[j].evaluate(cost_func) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -66,7 +71,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): swarm[j].update_position(bounds) if tuning_options.verbose: - print('Final result:') + print("Final result:") print(best_position_global) print(best_score_global) @@ -75,6 +80,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): tune.__doc__ = common.get_strategy_docstring("Particle Swarm Optimization (PSO)", _options) + class Particle: def __init__(self, bounds): self.ndim = len(bounds) diff --git a/kernel_tuner/strategies/pyatf_strategies.py b/kernel_tuner/strategies/pyatf_strategies.py new file mode 100644 index 000000000..897b3b5b6 --- /dev/null +++ b/kernel_tuner/strategies/pyatf_strategies.py @@ -0,0 +1,125 @@ +"""Strategy that dynamically imports and enables the use of pyATF strategies.""" + +from importlib import import_module +import zlib +from pathlib import Path + +from kernel_tuner.searchspace import Searchspace +from kernel_tuner.strategies import common +from kernel_tuner.strategies.common import CostFunc +from kernel_tuner.util import StopCriterionReached + +supported_searchtechniques = ["auc_bandit", "differential_evolution", "pattern_search", "round_robin", "simulated_annealing", "torczon"] + +_options = dict( + searchtechnique=(f"PyATF optimization algorithm to use, choose any from {supported_searchtechniques}", "simulated_annealing"), + use_searchspace_cache=(f"Use a cached search space if available, otherwise create a new one.", False) +) + +def get_cache_checksum(d: dict): + checksum=0 + for item in d.items(): + c1 = 1 + for t in item: + c1 = zlib.adler32(bytes(repr(t),'utf-8'), c1) + checksum=checksum ^ c1 + return checksum + +def tune(searchspace: Searchspace, runner, tuning_options): + from pyatf.search_techniques.search_technique import SearchTechnique + from pyatf.search_space import SearchSpace as pyATFSearchSpace + from pyatf import TP + + # get the search technique module name and whether to use search space caching + module_name, use_searchspace_cache = common.get_options(tuning_options.strategy_options, _options) + try: + if use_searchspace_cache: + import dill + pyatf_search_space_caching = use_searchspace_cache + except ImportError: + from warnings import warn + pyatf_search_space_caching = False + warn("dill is not installed, pyATF search space caching will not be used.") + + # setup the Kernel Tuner functionalities + cost_func = CostFunc(searchspace, tuning_options, runner, scaling=False, snap=False, return_invalid=True) + + # dynamically import the search technique based on the provided options + module = import_module(f"pyatf.search_techniques.{module_name}") + class_name = [d for d in dir(module) if d.lower() == module_name.replace('_','')][0] + searchtechnique_class = getattr(module, class_name) + + # instantiate the search technique + search_technique = searchtechnique_class() + search_technique.initialize(len(searchspace.param_names)) + assert isinstance(search_technique, SearchTechnique), f"Search technique {search_technique} is not a valid pyATF search technique." + + # get the search space hash + tune_params_hashable = {k: ",".join([str(i) for i in v]) if isinstance(v, (list, tuple)) else v for k, v in searchspace.tune_params.items()} + searchspace_caches_folder = Path("./pyatf_searchspace_caches") + searchspace_caches_folder.mkdir(parents=True, exist_ok=True) + searchspace_cache_path = searchspace_caches_folder / Path(f"pyatf_searchspace_cache_{get_cache_checksum(tune_params_hashable)}.pkl") + + # initialize the search space + if not pyatf_search_space_caching or not searchspace_cache_path.exists(): + searchspace_pyatf = Searchspace( + searchspace.tune_params, + tuning_options.restrictions_unmodified, + searchspace.max_threads, + searchspace.block_size_names, + defer_construction=True, + framework="pyatf" + ) + tune_params_pyatf = searchspace_pyatf.get_tune_params_pyatf() + assert isinstance(tune_params_pyatf, (tuple, list)), f"Tuning parameters must be a tuple or list of tuples, is {type(tune_params_pyatf)} ({tune_params_pyatf})." + search_space_pyatf = pyATFSearchSpace(*tune_params_pyatf, enable_1d_access=False) # SearchTechnique1D currently not supported + if pyatf_search_space_caching: + dill.dump(search_space_pyatf, open(searchspace_cache_path, "wb")) + elif searchspace_cache_path.exists(): + search_space_pyatf = dill.load(open(searchspace_cache_path, "rb")) + + # initialize + get_next_coordinates_or_indices = search_technique.get_next_coordinates + coordinates_or_indices = set() # Set[Union[Coordinates, Index]] + costs = {} # Dict[Union[Coordinates, Index], Cost] + eval_count = 0 + + try: + # optimization loop (KT-compatible re-implementation of `make_step` from TuningRun) + while eval_count < searchspace.size: + + # get new coordinates + if not coordinates_or_indices: + if costs: + search_technique.report_costs(costs) + costs.clear() + coordinates_or_indices.update(get_next_coordinates_or_indices()) + + # get configuration + coords_or_index = coordinates_or_indices.pop() + config = search_space_pyatf.get_configuration(coords_or_index) + valid = True + cost = None + + # evaluate the configuration + x = tuple([config[k] for k in searchspace.tune_params.keys()]) + opt_result = cost_func(x, check_restrictions=False) + + # adjust opt_result to expected PyATF output in cost and valid + if not isinstance(opt_result, (int, float)): + valid = False + else: + cost = opt_result + eval_count += 1 + + # record the evaluation + costs[coords_or_index] = cost + except StopCriterionReached: + pass + finally: + search_technique.finalize() + + return cost_func.results + + +tune.__doc__ = common.get_strategy_docstring("pyatf_strategies", _options) diff --git a/kernel_tuner/strategies/random_sample.py b/kernel_tuner/strategies/random_sample.py index 86caccfa3..33b5075d3 100644 --- a/kernel_tuner/strategies/random_sample.py +++ b/kernel_tuner/strategies/random_sample.py @@ -1,7 +1,7 @@ """Iterate over a random sample of the parameter space.""" import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc @@ -26,7 +26,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): for sample in samples: try: cost_func(sample, check_restrictions=False) - except util.StopCriterionReached as e: + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results diff --git a/kernel_tuner/strategies/simulated_annealing.py b/kernel_tuner/strategies/simulated_annealing.py index 80162b487..ee4f1355c 100644 --- a/kernel_tuner/strategies/simulated_annealing.py +++ b/kernel_tuner/strategies/simulated_annealing.py @@ -4,22 +4,24 @@ import numpy as np -from kernel_tuner import util +from kernel_tuner.util import StopCriterionReached, ErrorConfig from kernel_tuner.searchspace import Searchspace from kernel_tuner.strategies import common from kernel_tuner.strategies.common import CostFunc -_options = dict(T=("Starting temperature", 1.0), - T_min=("End temperature", 0.001), - alpha=("Alpha parameter", 0.995), - maxiter=("Number of iterations within each annealing step", 1)) + +_options = dict(T=("Starting temperature", 0.5), + T_min=("End temperature", 0.001), + alpha=("Alpha parameter", 0.9975), + maxiter=("Number of iterations within each annealing step", 2), + constraint_aware=("constraint-aware optimization (True/False)", True)) def tune(searchspace: Searchspace, runner, tuning_options): # SA works with real parameter values and does not need scaling - cost_func = CostFunc(searchspace, tuning_options, runner) + cost_func = CostFunc(searchspace, tuning_options, runner, return_invalid=True) # optimization parameters - T, T_min, alpha, niter = common.get_options(tuning_options.strategy_options, _options) + T, T_min, alpha, niter, constraint_aware = common.get_options(tuning_options.strategy_options, _options) T_start = T # compute how many iterations would be needed to complete the annealing schedule @@ -34,7 +36,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): # get random starting point and evaluate cost pos = cost_func.get_start_pos() - old_cost = cost_func(pos, check_restrictions=False) + old_cost = cost_func(pos, check_restrictions=not constraint_aware) # main optimization loop stuck = 0 @@ -49,10 +51,10 @@ def tune(searchspace: Searchspace, runner, tuning_options): for _ in range(niter): - new_pos = neighbor(pos, searchspace) + new_pos = neighbor(pos, searchspace, constraint_aware) try: - new_cost = cost_func(new_pos, check_restrictions=False) - except util.StopCriterionReached as e: + new_cost = cost_func(new_pos, check_restrictions=not constraint_aware) + except StopCriterionReached as e: if tuning_options.verbose: print(e) return cost_func.results @@ -76,7 +78,7 @@ def tune(searchspace: Searchspace, runner, tuning_options): stuck = 0 c_old = c if stuck > 100: - pos = list(searchspace.get_random_sample(1)[0]) + pos = generate_starting_point(searchspace, constraint_aware) stuck = 0 # safeguard @@ -90,13 +92,12 @@ def tune(searchspace: Searchspace, runner, tuning_options): def acceptance_prob(old_cost, new_cost, T, tuning_options): """Annealing equation, with modifications to work towards a lower value.""" - error_val = sys.float_info.max res = 0.0 # if start pos is not valid, always move - if old_cost == error_val: + if isinstance(old_cost, ErrorConfig): res = 1.0 # if we have found a valid ps before, never move to nonvalid pos - elif new_cost == error_val: + elif isinstance(new_cost, ErrorConfig): res = 0.0 # always move if new cost is better elif new_cost < old_cost: @@ -110,11 +111,60 @@ def acceptance_prob(old_cost, new_cost, T, tuning_options): return res -def neighbor(pos, searchspace: Searchspace): +def neighbor(pos, searchspace: Searchspace, constraint_aware=True): """Return a random neighbor of pos.""" - # Note: this is not the same as the previous implementation, because it is possible that non-edge parameters remain the same, but suggested configurations will all be within restrictions - neighbors = searchspace.get_neighbors(tuple(pos), neighbor_method='Hamming') if random.random() < 0.2 else searchspace.get_neighbors(tuple(pos), neighbor_method='strictly-adjacent') - if len(neighbors) > 0: - return list(random.choice(neighbors)) - # if there are no neighbors, return a random configuration - return list(searchspace.get_random_sample(1)[0]) + + def random_neighbor(pos, method): + """Helper method to return a random neighbor.""" + neighbor = searchspace.get_random_neighbor(pos, neighbor_method=method) + if neighbor is None: + return pos + return neighbor + + size = len(pos) + + if constraint_aware: + pos = tuple(pos) + + # Note: the following tries to mimick as much as possible the earlier version of SA but in a constraint-aware version + for i in range(size): + if random.random() < 0.2: + pos = random_neighbor(pos, 'Hamming') + pos = random_neighbor(pos, 'adjacent') + + return list(pos) + + else: + tune_params = searchspace.tune_params + pos_out = [] + # random mutation + # expected value is set that values all dimensions attempt to get mutated + for i in range(size): + key = list(tune_params.keys())[i] + values = tune_params[key] + + if random.random() < 0.2: #replace with random value + new_value = random_val(i, tune_params) + else: #adjacent value + ind = values.index(pos[i]) + if random.random() > 0.5: + ind += 1 + else: + ind -= 1 + ind = min(max(ind, 0), len(values)-1) + new_value = values[ind] + + pos_out.append(new_value) + return pos_out + +def random_val(index, tune_params): + """return a random value for a parameter""" + key = list(tune_params.keys())[index] + return random.choice(tune_params[key]) + +def generate_starting_point(searchspace: Searchspace, constraint_aware=True): + if constraint_aware: + return list(searchspace.get_random_sample(1)[0]) + else: + tune_params = searchspace.tune_params + return [random_val(i, tune_params) for i in range(len(tune_params))] diff --git a/kernel_tuner/strategies/wrapper.py b/kernel_tuner/strategies/wrapper.py new file mode 100644 index 000000000..d6d91f2dd --- /dev/null +++ b/kernel_tuner/strategies/wrapper.py @@ -0,0 +1,49 @@ +"""Wrapper intended for user-defined custom optimization methods""" + +from abc import ABC, abstractmethod + +from kernel_tuner import util +from kernel_tuner.searchspace import Searchspace +from kernel_tuner.strategies.common import CostFunc + + +class OptAlg(ABC): + """Base class for user-defined optimization algorithms.""" + + def __init__(self): + self.costfunc_kwargs = {"scaling": False, "snap": False} + + @abstractmethod + def __call__(self, func: CostFunc, searchspace: Searchspace) -> tuple[tuple, float]: + """Optimize the black box function `func` within the given `searchspace`. + + Args: + func (CostFunc): Cost function to be optimized. Has a property `budget_spent_fraction` that indicates how much of the budget has been spent. + searchspace (Searchspace): Search space containing the parameters to be optimized. + + Returns: + tuple[tuple, float]: tuple of the best parameters and the corresponding cost value + """ + pass + + +class OptAlgWrapper: + """Wrapper class for user-defined optimization algorithms""" + + def __init__(self, optimizer: OptAlg): + self.optimizer: OptAlg = optimizer + + def tune(self, searchspace: Searchspace, runner, tuning_options): + cost_func = CostFunc(searchspace, tuning_options, runner, **self.optimizer.costfunc_kwargs) + + if self.optimizer.costfunc_kwargs.get('scaling', False): + # Initialize costfunc for scaling + cost_func.get_bounds_x0_eps() + + try: + self.optimizer(cost_func, searchspace) + except util.StopCriterionReached as e: + if tuning_options.verbose: + print(e) + + return cost_func.results diff --git a/kernel_tuner/util.py b/kernel_tuner/util.py index bf067c71c..2d9e3f1b3 100644 --- a/kernel_tuner/util.py +++ b/kernel_tuner/util.py @@ -191,12 +191,29 @@ def check_argument_list(kernel_name, kernel_string, args): warnings.warn(errors[0], UserWarning) -def check_stop_criterion(to): - """Checks if max_fevals is reached or time limit is exceeded.""" - if "max_fevals" in to and len(to.unique_results) >= to.max_fevals: - raise StopCriterionReached("max_fevals reached") - if "time_limit" in to and (((time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time) > to.time_limit): - raise StopCriterionReached("time limit exceeded") +def check_stop_criterion(to: dict) -> float: + """Check if the stop criterion is reached. + + Args: + to (dict): tuning options. + + Raises: + StopCriterionReached: if the max_fevals is reached or time limit is exceeded. + + Returns: + float: fraction of budget spent. If both max_fevals and time_limit are set, it returns the fraction of time. + """ + if "max_fevals" in to: + if len(to.unique_results) >= to.max_fevals: + raise StopCriterionReached(f"max_fevals ({to.max_fevals}) reached") + if not "time_limit" in to: + return len(to.unique_results) / to.max_fevals + if "time_limit" in to: + time_spent = (time.perf_counter() - to.start_time) + (to.simulated_time * 1e-3) + to.startup_time + if time_spent > to.time_limit: + raise StopCriterionReached("time limit exceeded") + return time_spent / to.time_limit + def check_tune_params_list(tune_params, observers, simulation_mode=False): @@ -479,13 +496,13 @@ def get_interval(a: list): a_min = min(a) a_max = max(a) if len(a) <= 2: - return (a_min, a_max, a_max-a_min) + return (a_min, a_max, a_max - a_min) # determine the first step size - step = a[1]-a_min + step = a[1] - a_min # for each element, the step size should be equal to the first step for i, e in enumerate(a): - if e-a[i-1] != step: - return None + if e - a[i - 1] != step: + return None result = (a_min, a_max, step) if not all(isinstance(e, (int, float)) for e in result): return None @@ -910,8 +927,8 @@ def replace_params_split(match_object): # remove functionally duplicate restrictions (preserves order and whitespace) if all(isinstance(r, str) for r in restrictions): # clean the restriction strings to functional equivalence - restrictions_cleaned = [r.replace(' ', '') for r in restrictions] - restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned)) # dict preserves order + restrictions_cleaned = [r.replace(" ", "") for r in restrictions] + restrictions_cleaned_unique = list(dict.fromkeys(restrictions_cleaned)) # dict preserves order # get the indices of the unique restrictions, use these to build a new list of restrictions restrictions_unique_indices = [restrictions_cleaned.index(r) for r in restrictions_cleaned_unique] restrictions = [restrictions[i] for i in restrictions_unique_indices] @@ -952,8 +969,12 @@ def replace_params_split(match_object): # combine multiple restrictions into one for res_tuple in res_dict.values(): res, params_used = res_tuple - params_used = list(dict.fromkeys(params_used)) # param_used should only contain unique, dict preserves order - parsed_restrictions_pyatf.append((f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used)) + params_used = list( + dict.fromkeys(params_used) + ) # param_used should only contain unique, dict preserves order + parsed_restrictions_pyatf.append( + (f"def r({', '.join(params_used)}): return ({') and ('.join(res)}) \n", params_used) + ) parsed_restrictions = parsed_restrictions_pyatf else: # create one monolithic function @@ -1126,7 +1147,9 @@ def compile_restrictions( def check_matching_problem_size(cached_problem_size, problem_size): """Check the if requested problem size matches the problem size in the cache.""" - if not (np.array(cached_problem_size) == np.array(problem_size)).all(): + cached_problem_size_arr = np.array(cached_problem_size) + problem_size_arr = np.array(problem_size) + if cached_problem_size_arr.size != problem_size_arr.size or not (cached_problem_size_arr == problem_size_arr).all(): raise ValueError(f"Cannot load cache which contains results for different problem_size, cache: {cached_problem_size}, requested: {problem_size}") def process_cache(cache, kernel_options, tuning_options, runner): @@ -1208,7 +1231,7 @@ def process_cache(cache, kernel_options, tuning_options, runner): ) raise ValueError( f"Cannot load cache which contains results obtained with different tunable parameters. \ - Cache has: {cached_data['tune_params_keys']}, tuning_options has: {list(tuning_options.tune_params.keys())}" + Cache at '{cache}' has: {cached_data['tune_params_keys']}, tuning_options has: {list(tuning_options.tune_params.keys())}" ) tuning_options.cachefile = cache diff --git a/pyproject.toml b/pyproject.toml index a5e3f0522..ffc0583be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [project] name = "kernel_tuner" description = "An easy to use CUDA/OpenCL kernel tuner in Python" -version = "1.2.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55 +version = "1.3.0" # adhere to PEP440 versioning: https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#id55 readme = "README.md" license = "Apache-2.0" authors = [ @@ -54,7 +54,7 @@ dependencies = [ "scipy>=1.14.1", # Python >=3.13 needs scipy >=1.14 "packaging", # required by file_utils "jsonschema", - "python-constraint2>=2.3.1", + "python-constraint2>=2.4.0", "xmltodict", "pandas>=2.0.0", "scikit-learn>=1.0.2", @@ -125,7 +125,7 @@ pytest-cov = "^5.0.0" mock = "^5.1.0" nox = "^2024.4.15" nox-poetry = "^1.0.3" -ruff = "^0.4.4" +ruff = "^0.4.8" pep440 = "^0.1.2" tomli = "^2.0.1" # held back by Python <= 3.10, can be replaced by built-in [tomllib](https://docs.python.org/3.11/library/tomllib.html) from Python 3.11 onwards @@ -154,3 +154,5 @@ select = [ ] [tool.ruff.pydocstyle] convention = "google" +[tool.ruff.lint] +select = ["NPY201"] diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..1539a6cdf --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +def pytest_collection_modifyitems(items): + for item in items: + if item.get_closest_marker('timeout') is None: + item.add_marker(pytest.mark.timeout(60)) \ No newline at end of file diff --git a/test/context.py b/test/context.py index afbd68e3e..bad152986 100644 --- a/test/context.py +++ b/test/context.py @@ -1,7 +1,6 @@ -import sys -import subprocess import shutil -import os +import subprocess +import sys import pytest @@ -48,7 +47,7 @@ try: import cuda - + print(cuda) cuda_present = True except Exception: cuda_present = False @@ -60,6 +59,26 @@ except (ImportError, RuntimeError): hip_present = False +try: + import botorch + import torch + bayes_opt_botorch_present = True +except ImportError: + bayes_opt_botorch_present = False + +try: + import gpytorch + import torch + bayes_opt_gpytorch_present = True +except ImportError: + bayes_opt_gpytorch_present = False + +try: + import pyatf + pyatf_present = True +except ImportError: + pyatf_present = False + try: from autotuning_methodology.report_experiments import get_strategy_scores methodology_present = True @@ -85,7 +104,10 @@ ) skip_if_no_openmp = pytest.mark.skipif(not openmp_present, reason="No OpenMP found") skip_if_no_openacc = pytest.mark.skipif(not openacc_present, reason="No nvc++ on PATH") -skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found or no HIP device detected") +skip_if_no_bayesopt_gpytorch = pytest.mark.skipif(not bayes_opt_gpytorch_present, reason="Torch and GPyTorch not installed") +skip_if_no_bayesopt_botorch = pytest.mark.skipif(not bayes_opt_botorch_present, reason="Torch and BOTorch not installed") +skip_if_no_hip = pytest.mark.skipif(not hip_present, reason="No HIP Python found") +skip_if_no_pyatf = pytest.mark.skipif(not pyatf_present, reason="PyATF not installed") skip_if_no_methodology = pytest.mark.skipif(not methodology_present, reason="Autotuning Methodology not found") diff --git a/test/strategies/__init__.py b/test/strategies/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/strategies/test_cache_file.json b/test/strategies/test_cache_file.json new file mode 100644 index 000000000..6073d4b62 --- /dev/null +++ b/test/strategies/test_cache_file.json @@ -0,0 +1,394 @@ +{ + "device_name": "NVIDIA RTX A4000", + "kernel_name": "vector_add", + "tune_params_keys": [ + "block_size_x", + "test_string", + "test_single", + "test_bool", + "test_mixed" + ], + "tune_params": { + "block_size_x": [ + 128, + 192, + 256, + 320, + 384, + 448, + 512, + 576, + 640, + 704, + 768, + 832, + 896, + 960, + 1024 + ], + "test_string": [ + "alg_1", + "alg_2" + ], + "test_single": [ + 15 + ], + "test_bool": [ + true, + false + ], + "test_mixed": [ + "test", + 1, + true, + 2.45 + ] + }, + "cache": { + "128,alg_2,15,True,2.45": { + "block_size_x": 128, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04073600071881499, + "times": [ + 0.1268800050020218, + 0.031072000041604042, + 0.027295999228954315, + 0.025472000241279602, + 0.025119999423623085, + 0.025248000398278236, + 0.024064000695943832 + ], + "compile_time": 440.9545585513115, + "verification_time": 0, + "benchmark_time": 1.091592013835907, + "strategy_time": 0, + "framework_time": 0.8587837219238281, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "192,alg_2,15,True,2.45": { + "block_size_x": 192, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04095085710287094, + "times": [ + 0.12908799946308136, + 0.03046399913728237, + 0.027744000777602196, + 0.025151999667286873, + 0.024960000067949295, + 0.024992000311613083, + 0.02425600029528141 + ], + "compile_time": 436.15153804421425, + "verification_time": 0, + "benchmark_time": 1.0972395539283752, + "strategy_time": 0, + "framework_time": 1.6656816005706787, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "256,alg_2,15,True,2.45": { + "block_size_x": 256, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04189257137477398, + "times": [ + 0.13180799782276154, + 0.031136000528931618, + 0.028095999732613564, + 0.027008000761270523, + 0.025087999179959297, + 0.02505600079894066, + 0.02505600079894066 + ], + "compile_time": 436.5839697420597, + "verification_time": 0, + "benchmark_time": 1.0691732168197632, + "strategy_time": 0, + "framework_time": 1.6054585576057434, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "320,alg_2,15,True,2.45": { + "block_size_x": 320, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04208914376795292, + "times": [ + 0.1358720064163208, + 0.030688000842928886, + 0.02768000029027462, + 0.02582399919629097, + 0.025087999179959297, + 0.025312000885605812, + 0.024159999564290047 + ], + "compile_time": 438.9761835336685, + "verification_time": 0, + "benchmark_time": 1.0976120829582214, + "strategy_time": 0, + "framework_time": 1.4494173228740692, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "384,alg_2,15,True,2.45": { + "block_size_x": 384, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04174171467976911, + "times": [ + 0.13251200318336487, + 0.03167999908328056, + 0.027871999889612198, + 0.025312000885605812, + 0.024671999737620354, + 0.02505600079894066, + 0.025087999179959297 + ], + "compile_time": 440.71199372410774, + "verification_time": 0, + "benchmark_time": 1.0499358177185059, + "strategy_time": 0, + "framework_time": 1.682564616203308, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "448,alg_2,15,True,2.45": { + "block_size_x": 448, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.03249828570655414, + "times": [ + 0.0647680014371872, + 0.03167999908328056, + 0.028255999088287354, + 0.025280000641942024, + 0.027103999629616737, + 0.02550400048494339, + 0.02489599958062172 + ], + "compile_time": 449.13655519485474, + "verification_time": 0, + "benchmark_time": 1.1196956038475037, + "strategy_time": 0, + "framework_time": 1.5890561044216156, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "512,alg_2,15,True,2.45": { + "block_size_x": 512, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04139885599059718, + "times": [ + 0.13023999333381653, + 0.031136000528931618, + 0.02831999957561493, + 0.02595200017094612, + 0.024607999250292778, + 0.025151999667286873, + 0.024383999407291412 + ], + "compile_time": 440.5844733119011, + "verification_time": 0, + "benchmark_time": 1.09076127409935, + "strategy_time": 0, + "framework_time": 1.853298395872116, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "576,alg_2,15,True,2.45": { + "block_size_x": 576, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04189257137477398, + "times": [ + 0.12995199859142303, + 0.03200000151991844, + 0.028511999174952507, + 0.026623999699950218, + 0.025760000571608543, + 0.02537599951028824, + 0.02502400055527687 + ], + "compile_time": 442.16764718294144, + "verification_time": 0, + "benchmark_time": 1.1038780212402344, + "strategy_time": 0, + "framework_time": 1.8403716385364532, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "640,alg_2,15,True,2.45": { + "block_size_x": 640, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.0411702852163996, + "times": [ + 0.12796799838542938, + 0.03081599995493889, + 0.02969600073993206, + 0.025439999997615814, + 0.02409599907696247, + 0.02582399919629097, + 0.024351999163627625 + ], + "compile_time": 437.98910081386566, + "verification_time": 0, + "benchmark_time": 1.0496266186237335, + "strategy_time": 0, + "framework_time": 1.8264725804328918, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "704,alg_2,15,True,2.45": { + "block_size_x": 704, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04184228580977235, + "times": [ + 0.1343040019273758, + 0.03094400092959404, + 0.02908799983561039, + 0.025151999667286873, + 0.02486399933695793, + 0.024447999894618988, + 0.02409599907696247 + ], + "compile_time": 443.51235404610634, + "verification_time": 0, + "benchmark_time": 1.1033527553081512, + "strategy_time": 0, + "framework_time": 1.6709677875041962, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "768,alg_2,15,True,2.45": { + "block_size_x": 768, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.03175771422684193, + "times": [ + 0.06230400130152702, + 0.0315839983522892, + 0.02831999957561493, + 0.02672000043094158, + 0.023679999634623528, + 0.023903999477624893, + 0.02579200081527233 + ], + "compile_time": 450.4409395158291, + "verification_time": 0, + "benchmark_time": 1.101326197385788, + "strategy_time": 0, + "framework_time": 1.7531625926494598, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "832,alg_2,15,True,2.45": { + "block_size_x": 832, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.040941715240478516, + "times": [ + 0.12998400628566742, + 0.03094400092959404, + 0.027103999629616737, + 0.024768000468611717, + 0.025439999997615814, + 0.023903999477624893, + 0.024447999894618988 + ], + "compile_time": 439.9200603365898, + "verification_time": 0, + "benchmark_time": 1.0421127080917358, + "strategy_time": 0, + "framework_time": 2.1368376910686493, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "896,alg_2,15,True,2.45": { + "block_size_x": 896, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04177371359297207, + "times": [ + 0.12931199371814728, + 0.03731200098991394, + 0.02812799997627735, + 0.02502400055527687, + 0.02412799932062626, + 0.024768000468611717, + 0.023744000121951103 + ], + "compile_time": 439.23527002334595, + "verification_time": 0, + "benchmark_time": 1.0946877300739288, + "strategy_time": 0, + "framework_time": 2.03637033700943, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "960,alg_2,15,True,2.45": { + "block_size_x": 960, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.042189714631864, + "times": [ + 0.1335040032863617, + 0.031039999797940254, + 0.02876799926161766, + 0.02579200081527233, + 0.025119999423623085, + 0.02566399984061718, + 0.025439999997615814 + ], + "compile_time": 441.7596235871315, + "verification_time": 0, + "benchmark_time": 1.1166557669639587, + "strategy_time": 0, + "framework_time": 1.7383433878421783, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + }, + "1024,alg_2,15,True,2.45": { + "block_size_x": 1024, + "test_string": "alg_2", + "test_single": 15, + "test_bool": true, + "test_mixed": 2.45, + "time": 0.04114742816558906, + "times": [ + 0.13087999820709229, + 0.03049599938094616, + 0.027936000376939774, + 0.02486399933695793, + 0.0244159996509552, + 0.024320000782608986, + 0.025119999423623085 + ], + "compile_time": 442.8337663412094, + "verification_time": 0, + "benchmark_time": 1.0683201253414154, + "strategy_time": 0, + "framework_time": 1.9918642938137054, + "timestamp": "2022-12-23 12:11:26.411558+00:00" + } + } +} \ No newline at end of file diff --git a/test/strategies/test_diff_evo.py b/test/strategies/test_diff_evo.py new file mode 100644 index 000000000..4da12dfc6 --- /dev/null +++ b/test/strategies/test_diff_evo.py @@ -0,0 +1,189 @@ +import numpy as np +import pytest +from kernel_tuner.strategies.diff_evo import ( + values_to_indices, + indices_to_values, + mutate_de_1, + mutate_de_2, + binomial_crossover, + exponential_crossover, + parse_method, + mutation, + crossover, +) +from kernel_tuner.strategies.diff_evo import supported_methods +from kernel_tuner import tune_kernel + +from .test_strategies import vector_add, cache_filename + + +def test_values_to_indices(): + + tune_params = {} + tune_params["block_size_x"] = [16, 32, 128, 1024] + + result = values_to_indices([1024], tune_params) + expected = [3] + assert result[0] == expected[0] + assert len(result) == len(expected) + + tune_params["block_size_y"] = [16, 32, 128, 1024] + + result = values_to_indices([32, 128], tune_params) + expected = [1, 2] + assert result[0] == expected[0] + assert result[1] == expected[1] + assert len(result) == len(expected) + + +def test_indices_to_values(): + + tune_params = {} + tune_params["block_size_x"] = [16, 32, 128, 1024] + + expected = [1024] + result = indices_to_values([3], tune_params) + assert result[0] == expected[0] + assert len(result) == len(expected) + + tune_params["block_size_y"] = [16, 32, 128, 1024] + expected = [1024, 32] + result = indices_to_values([3, 1], tune_params) + assert result[0] == expected[0] + assert result[1] == expected[1] + assert len(result) == len(expected) + + +def test_mutate_de_1(): + + tune_params = {} + tune_params["block_size_x"] = [16, 32, 128, 256, 512, 1024] + tune_params["block_size_y"] = [1, 2, 8] + tune_params["block_size_z"] = [1, 2, 4, 8] + + a_idx = np.array([0, 1, 2]) + b_idx = np.array([4, 1, 0]) + c_idx = np.array([5, 0, 1]) + randos_idx = [a_idx, b_idx, c_idx] + + F = 0.8 + params_list = list(tune_params) + min_idx = np.zeros(len(tune_params)) + max_idx = [len(v) - 1 for v in tune_params.values()] + + mutant = mutate_de_1(a_idx, randos_idx, F, min_idx, max_idx, False) + + assert len(mutant) == len(a_idx) + + for dim, idx in enumerate(mutant): + assert isinstance(idx, np.integer) + assert min_idx[dim] <= idx <= max_idx[dim] + + mutant = mutate_de_1(a_idx, randos_idx[:-1], F, min_idx, max_idx, True) + + assert len(mutant) == len(a_idx) + + for dim, idx in enumerate(mutant): + assert isinstance(idx, np.integer) + assert min_idx[dim] <= idx <= max_idx[dim] + + +def test_mutate_de_2(): + + tune_params = {} + tune_params["block_size_x"] = [16, 32, 128, 256, 512, 1024] + tune_params["block_size_y"] = [1, 2, 8] + tune_params["block_size_z"] = [1, 2, 4, 8] + + a_idx = np.array([0, 1, 2]) + b_idx = np.array([4, 1, 0]) + c_idx = np.array([5, 0, 1]) + d_idx = np.array([3, 2, 3]) + e_idx = np.array([1, 0, 3]) + randos_idx = [a_idx, b_idx, c_idx, d_idx, e_idx] + + F = 0.8 + params_list = list(tune_params) + min_idx = np.zeros(len(tune_params)) + max_idx = [len(v) - 1 for v in tune_params.values()] + + mutant = mutate_de_2(a_idx, randos_idx, F, min_idx, max_idx, False) + + assert len(mutant) == len(a_idx) + + for dim, idx in enumerate(mutant): + assert isinstance(idx, np.integer) + assert min_idx[dim] <= idx <= max_idx[dim] + + mutant = mutate_de_2(a_idx, randos_idx[:-1], F, min_idx, max_idx, True) + + assert len(mutant) == len(a_idx) + + for dim, idx in enumerate(mutant): + assert isinstance(idx, np.integer) + assert min_idx[dim] <= idx <= max_idx[dim] + + +def test_binomial_crossover(): + + donor_vector = np.array([1, 2, 3, 4, 5]) + target = np.array([6, 7, 8, 9, 10]) + CR = 0.8 + + result = binomial_crossover(donor_vector, target, CR) + assert len(result) == len(donor_vector) + + for dim, val in enumerate(result): + assert (val == donor_vector[dim]) or (val == target[dim]) + + +def test_exponential_crossover(): + + donor_vector = np.array([1, 2, 3, 4, 5]) + target = np.array([6, 7, 8, 9, 10]) + CR = 0.8 + + result = exponential_crossover(donor_vector, target, CR) + assert len(result) == len(donor_vector) + + for dim, val in enumerate(result): + assert (val == donor_vector[dim]) or (val == target[dim]) + + +def test_parse_method(): + + # check unsupported methods raise ValueError + for method in ["randtobest4bin", "bogus3log"]: + print(f"{method=}") + with pytest.raises(ValueError): + parse_method(method) + + # check if parses correctly + def check_result(result, expected): + assert len(result) == len(expected) + for i, res in enumerate(result): + assert res == expected[i] + + check_result(parse_method("rand1bin"), [False, 1, mutation["1"], crossover["bin"]]) + check_result(parse_method("best1exp"), [True, 1, mutation["1"], crossover["exp"]]) + check_result(parse_method("randtobest1exp"), [False, 1, mutation["randtobest"], crossover["exp"]]) + check_result(parse_method("currenttobest1bin"), [False, 1, mutation["currenttobest"], crossover["bin"]]) + + +@pytest.mark.parametrize("method", supported_methods) +def test_diff_evo(vector_add, method): + restrictions = [ + "test_string == 'alg_2'", + "test_bool == True", + "test_mixed == 2.45" + ] + result, _ = tune_kernel( + *vector_add, + restrictions=restrictions, + strategy="diff_evo", + strategy_options=dict(popsize=5, method=method), + verbose=True, + cache=cache_filename, + simulation_mode=True, + ) + assert len(result) > 0 diff --git a/test/strategies/test_genetic_algorithm.py b/test/strategies/test_genetic_algorithm.py index cb07f8d7f..d16ad11ce 100644 --- a/test/strategies/test_genetic_algorithm.py +++ b/test/strategies/test_genetic_algorithm.py @@ -14,10 +14,12 @@ def test_weighted_choice(): pop = searchspace.get_random_sample(pop_size) weighted_pop = [[p, i] for i, p in enumerate(pop)] - result = ga.weighted_choice(weighted_pop, 1) + GA = ga.GeneticAlgorithm(pop_size, searchspace) + + result = GA.weighted_choice(weighted_pop, 1) assert result[0] in pop - result = ga.weighted_choice(weighted_pop, 2) + result = GA.weighted_choice(weighted_pop, 2) print(result) assert result[0] in pop assert result[1] in pop @@ -41,9 +43,12 @@ def test_random_population(): def test_mutate(): - pop = searchspace.get_random_sample(1) - mutant = ga.mutate(pop[0], 10, searchspace) + GA = ga.GeneticAlgorithm(1, searchspace) + + pop = GA.generate_population() + + mutant = GA.mutate(pop[0]) assert len(pop[0]) == len(mutant) assert mutant[0] in tune_params["x"] assert mutant[1] in tune_params["y"] diff --git a/test/strategies/test_strategies.py b/test/strategies/test_strategies.py index 0d8ec0458..ea5a2994d 100644 --- a/test/strategies/test_strategies.py +++ b/test/strategies/test_strategies.py @@ -2,12 +2,16 @@ import numpy as np import pytest +from pathlib import Path import kernel_tuner -from kernel_tuner import util +from kernel_tuner.util import InvalidConfig from kernel_tuner.interface import strategy_map -cache_filename = os.path.dirname(os.path.realpath(__file__)) + "/../test_cache_file.json" +from ..context import skip_if_no_bayesopt_botorch, skip_if_no_bayesopt_gpytorch, skip_if_no_pyatf + + +cache_filename = Path(__file__).parent / "test_cache_file.json" @pytest.fixture def vector_add(): @@ -29,13 +33,28 @@ def vector_add(): args = [c, a, b, n] tune_params = dict() tune_params["block_size_x"] = [128 + 64 * i for i in range(15)] + tune_params["test_string"] = ["alg_1", "alg_2"] + tune_params["test_single"] = [15] + tune_params["test_bool"] = [True, False] + tune_params["test_mixed"] = ["test", 1, True, 2.45] return ["vector_add", kernel_string, size, args, tune_params] - -@pytest.mark.parametrize('strategy', strategy_map) +# skip some strategies if their dependencies are not installed +strategies = [] +for s in strategy_map.keys(): + if 'gpytorch' in s.lower() or 'botorch_alt' in s.lower(): + continue # TODO issue warning for uninstalled dependencies? + if 'gpytorch' in s.lower(): + strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_gpytorch)) + elif 'botorch' in s.lower(): + strategies.append(pytest.param(s, marks=skip_if_no_bayesopt_botorch)) + elif 'pyatf' in s.lower(): + strategies.append(pytest.param(s, marks=skip_if_no_pyatf)) + else: + strategies.append(s) +@pytest.mark.parametrize('strategy', strategies) def test_strategies(vector_add, strategy): - options = dict(popsize=5, neighbor='adjacent') print(f"testing {strategy}") @@ -48,8 +67,27 @@ def test_strategies(vector_add, strategy): if strategy != "brute_force": filter_options["max_fevals"] = 10 - results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options, - verbose=False, cache=cache_filename, simulation_mode=True) + restrictions = [ + "test_string == 'alg_2'", + "test_bool == True", + "test_mixed == 2.45" + ] + + # pyATF can't handle non-number tune parameters, so we filter them out + cache_filename_local = cache_filename + if strategy == "pyatf_strategies": + tune_params = { + "block_size_x": [128 + 64 * i for i in range(15)] + } + restrictions = [] + cache_filename_local = cache_filename_local.parent.parent / "test_cache_file.json" + vector_add[-1] = tune_params + + # run the tuning in simulation mode + assert cache_filename_local.exists() + assert restrictions is not None + results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options, + verbose=False, cache=cache_filename_local, simulation_mode=True) assert len(results) > 0 @@ -59,7 +97,7 @@ def test_strategies(vector_add, strategy): unique_results = {} for result in results: x_int = ",".join([str(v) for k, v in result.items() if k in tune_params]) - if not isinstance(result["time"], util.InvalidConfig): + if not isinstance(result["time"], InvalidConfig): unique_results[x_int] = result["time"] assert len(unique_results) <= filter_options["max_fevals"] @@ -75,6 +113,11 @@ def test_strategies(vector_add, strategy): 'framework_time': (float, int), 'timestamp': str } + if strategy != "pyatf_strategies": + expected_items['test_string'] = str + expected_items['test_single'] = int + expected_items['test_bool'] = bool + expected_items['test_mixed'] = float for res in results: assert len(res) == len(expected_items) for expected_key, expected_type in expected_items.items(): @@ -82,18 +125,13 @@ def test_strategies(vector_add, strategy): assert isinstance(res[expected_key], expected_type) # check if strategy respects user-specified starting point (x0) - x0 = [256] + x0 = [256, 'alg_2', 15, True, 2.45] filter_options["x0"] = x0 - if not strategy in ["brute_force", "random_sample", "bayes_opt"]: - results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options, + if not strategy in ["brute_force", "random_sample", "bayes_opt", "pyatf_strategies"]: + results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options, verbose=False, cache=cache_filename, simulation_mode=True) assert results[0]["block_size_x"] == x0[0] else: with pytest.raises(ValueError): - results, _ = kernel_tuner.tune_kernel(*vector_add, strategy=strategy, strategy_options=filter_options, + results, _ = kernel_tuner.tune_kernel(*vector_add, restrictions=restrictions, strategy=strategy, strategy_options=filter_options, verbose=False, cache=cache_filename, simulation_mode=True) - - - - - diff --git a/test/test_cache_file_T1_input.json b/test/test_cache_file_T1_input.json new file mode 100644 index 000000000..814a1145d --- /dev/null +++ b/test/test_cache_file_T1_input.json @@ -0,0 +1,82 @@ +{ + "General": { + "BenchmarkName": "vector_add", + "OutputFormat": "JSON" + }, + "ConfigurationSpace": { + "TuningParameters": [ + { + "Name": "block_size_x", + "Type": "int", + "Values": "[128+64*i for i in range(15)]", + "Default": 512 + } + ], + "Conditions": [] + }, + "KernelSpecification": { + "Language": "CUDA", + "CompilerOptions": [ + "-std=c++11" + ], + "BenchmarkName": "vector_add", + "KernelName": "vector_add", + "KernelFile": "vector_add.cu", + "GlobalSizeType": "CUDA", + "LocalSize": { + "X": "block_size_x", + "Y": "1", + "Z": "1" + }, + "GlobalSize": { + "X": "10000000 // block_size_x", + "Y": "1", + "Z": "1" + }, + "GridDivX": [ + "block_size_x" + ], + "GridDivY": [ + "block_size_y" + ], + "ProblemSize": [], + "SharedMemory": 0, + "Stream": null, + "Arguments": [ + { + "Name": "a", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": 10000000, + "FillValue": 1.0 + }, + { + "Name": "b", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "ReadOnly", + "FillType": "Random", + "Size": 10000000, + "FillValue": 1.0 + }, + { + "Name": "c", + "Type": "float", + "MemoryType": "Vector", + "AccessType": "WriteOnly", + "FillType": "Constant", + "Size": 10000000, + "FillValue": 0.0 + }, + { + "Name": "n", + "Type": "int32", + "MemoryType": "Scalar", + "AccessType": "ReadOnly", + "FillValue": 10000000 + } + ] + } +} \ No newline at end of file diff --git a/test/test_compiler_functions.py b/test/test_compiler_functions.py index 163f1d0da..85391d44a 100644 --- a/test/test_compiler_functions.py +++ b/test/test_compiler_functions.py @@ -11,9 +11,9 @@ from unittest.mock import Mock, patch import kernel_tuner -from kernel_tuner import util from kernel_tuner.backends.compiler import Argument, CompilerFunctions, get_array_module, is_cupy_array from kernel_tuner.core import KernelInstance, KernelSource +from kernel_tuner.util import delete_temp_file from .context import skip_if_no_cupy, skip_if_no_gcc, skip_if_no_gfortran, skip_if_no_openmp from .test_runners import env as cuda_env # noqa: F401 @@ -272,7 +272,7 @@ def test_complies_fortran_function_with_module(): assert np.isclose(result, 42.0) finally: - util.delete_temp_file("my_fancy_module.mod") + delete_temp_file("my_fancy_module.mod") @pytest.fixture diff --git a/test/test_custom_optimizer.py b/test/test_custom_optimizer.py new file mode 100644 index 000000000..6e40e5aaf --- /dev/null +++ b/test/test_custom_optimizer.py @@ -0,0 +1,174 @@ + +### The following was generating using the LLaMEA prompt and OpenAI o1 + +import numpy as np + +from kernel_tuner.strategies.wrapper import OptAlg + +class HybridDELocalRefinement(OptAlg): + """ + A two-phase differential evolution with local refinement, intended for BBOB-type + black box optimization problems in [-5,5]^dim. + + One-line idea: A two-phase hybrid DE with local refinement that balances global + exploration and local exploitation under a strict function evaluation budget. + """ + + def __init__(self): + super().__init__() + self.costfunc_kwargs = {"scaling": True, "snap": True} + # You can adjust these hyperparameters based on experimentation/tuning: + self.F = 0.8 # Differential weight + self.CR = 0.9 # Crossover probability + self.local_search_freq = 10 # Local refinement frequency in generations + + def __call__(self, func, searchspace): + """ + Optimize the black box function `func` in [-5,5]^dim, using + at most self.budget function evaluations. + + Returns: + best_params: np.ndarray representing the best parameters found + best_value: float representing the best objective value found + """ + self.dim = searchspace.num_params + self.population_size = round(min(min(50, 10 * self.dim), np.ceil(searchspace.size / 3))) # Caps for extremely large dim + + # 1. Initialize population + lower_bound, upper_bound = -5.0, 5.0 + pop = np.random.uniform(lower_bound, upper_bound, (self.population_size, self.dim)) + + # Evaluate initial population + evaluations = 0 + fitness = np.empty(self.population_size) + for i in range(self.population_size): + fitness[i] = func(pop[i]) + evaluations += 1 + + # Track best solution + best_idx = np.argmin(fitness) + best_params = pop[best_idx].copy() + best_value = fitness[best_idx] + + # 2. Main evolutionary loop + gen = 0 + while func.budget_spent_fraction < 1.0 and evaluations < searchspace.size: + gen += 1 + for i in range(self.population_size): + # DE mutation: pick three distinct indices + idxs = np.random.choice(self.population_size, 3, replace=False) + a, b, c = pop[idxs] + mutant = a + self.F * (b - c) + + # Crossover + trial = np.copy(pop[i]) + crossover_points = np.random.rand(self.dim) < self.CR + trial[crossover_points] = mutant[crossover_points] + + # Enforce bounds + trial = np.clip(trial, lower_bound, upper_bound) + + # Evaluate trial + trial_fitness = func(trial) + evaluations += 1 + if func.budget_spent_fraction > 1.0: + # If out of budget, wrap up + if trial_fitness < fitness[i]: + pop[i] = trial + fitness[i] = trial_fitness + # Update global best + if trial_fitness < best_value: + best_value = trial_fitness + best_params = trial.copy() + break + + # Selection + if trial_fitness < fitness[i]: + pop[i] = trial + fitness[i] = trial_fitness + # Update global best + if trial_fitness < best_value: + best_value = trial_fitness + best_params = trial.copy() + + # Periodically refine best solution with a small local neighborhood search + if gen % self.local_search_freq == 0 and func.budget_spent_fraction < 1.0: + best_params, best_value, evaluations = self._local_refinement( + func, best_params, best_value, evaluations, lower_bound, upper_bound + ) + + return best_params, best_value + + def _local_refinement(self, func, best_params, best_value, evaluations, lb, ub): + """ + Local refinement around the best solution found so far. + Uses a quick 'perturb-and-accept' approach in a shrinking neighborhood. + """ + # Neighborhood size shrinks as the budget is consumed + step_size = 0.2 * (1.0 - func.budget_spent_fraction) + + for _ in range(5): # 5 refinements each time + if func.budget_spent_fraction >= 1.0: + break + candidate = best_params + np.random.uniform(-step_size, step_size, self.dim) + candidate = np.clip(candidate, lb, ub) + cand_value = func(candidate) + evaluations += 1 + if cand_value < best_value: + best_value = cand_value + best_params = candidate.copy() + + return best_params, best_value, evaluations + + +### Testing the Optimization Algorithm Wrapper in Kernel Tuner +from kernel_tuner import tune_kernel, tune_kernel_T1 +from kernel_tuner.strategies.wrapper import OptAlgWrapper +from pathlib import Path + +from .test_runners import env # noqa: F401 + +cache_filename = Path(__file__).parent.resolve() / "test_cache_file.json" + +def test_OptAlgWrapper(env): + kernel_name, kernel_string, size, args, tune_params = env + + # Instantiate LLaMAE optimization algorithm + optimizer = HybridDELocalRefinement() + + # Wrap the algorithm class in the OptAlgWrapper + # for use in Kernel Tuner + strategy = OptAlgWrapper(optimizer) + strategy_options = { 'max_fevals': 15 } + + # Call the tuner + res, _ = tune_kernel(kernel_name, kernel_string, size, args, tune_params, + strategy=strategy, strategy_options=strategy_options, cache=cache_filename, + simulation_mode=True, verbose=True) + assert len(res) == strategy_options['max_fevals'] + +def test_OptAlgWrapper_T1(env): + kernel_name, kernel_string, size, args, tune_params = env + + strategy = "HybridDELocalRefinement" + strategy_options = { + "max_fevals": 15, + "custom_search_method_path": Path(__file__).resolve(), + "constraint_aware": False, + } + iterations = 1 + + res, _ = tune_kernel_T1( + Path(__file__).parent.resolve() / "test_cache_file_T1_input.json", + cache_filename, + device="NVIDIA RTX A4000", + objective="time", + objective_higher_is_better=False, + simulation_mode=True, + output_T4=False, + iterations=iterations, + strategy=strategy, + strategy_options=strategy_options, + ) + + assert len(res) == strategy_options['max_fevals'] diff --git a/test/test_hyper.py b/test/test_hyper.py new file mode 100644 index 000000000..5963b3260 --- /dev/null +++ b/test/test_hyper.py @@ -0,0 +1,39 @@ +from kernel_tuner.hyper import tune_hyper_params + +from .context import skip_if_no_methodology +from .test_runners import env # noqa: F401 + + +@skip_if_no_methodology +def test_hyper(env): + + hyper_params = dict() + hyper_params["popsize"] = [5] + hyper_params["maxiter"] = [5, 10] + hyper_params["method"] = ["uniform"] + hyper_params["mutation_chance"] = [10] + + target_strategy = "genetic_algorithm" + + compiler_options = { + "gpus": ["A100", "MI250X"], + "override": { + "experimental_groups_defaults": { + "repeats": 1, + "samples": 1, + "minimum_fraction_of_budget_valid": 0.01, + }, + "statistics_settings": { + "cutoff_percentile": 0.80, + "cutoff_percentile_start": 0.01, + "cutoff_type": "time", + "objective_time_keys": [ + "all" + ] + } + } + } + + result, env = tune_hyper_params(target_strategy, hyper_params, restrictions=[], iterations=1, compiler_options=compiler_options, verbose=True, cache=None) + assert len(result) == 2 + assert 'best_config' in env diff --git a/test/test_integration.py b/test/test_integration.py index aafb437f1..637a07575 100644 --- a/test/test_integration.py +++ b/test/test_integration.py @@ -5,7 +5,7 @@ import pytest from kernel_tuner import integration -from kernel_tuner import util +from kernel_tuner.util import delete_temp_file from datetime import datetime, timezone @@ -71,7 +71,7 @@ def test_store_results(fake_results): assert my_gpu_100_data[0]["time"] < 100 finally: - util.delete_temp_file(filename) + delete_temp_file(filename) def test_setup_device_targets(fake_results): @@ -136,8 +136,8 @@ def test_setup_device_targets(fake_results): assert expected in output_str finally: - util.delete_temp_file(results_filename) - util.delete_temp_file(header_filename) + delete_temp_file(results_filename) + delete_temp_file(header_filename) def test_setup_device_targets_max(fake_results): @@ -174,5 +174,5 @@ def test_setup_device_targets_max(fake_results): assert expected in output_str finally: - util.delete_temp_file(results_filename) - util.delete_temp_file(header_filename) + delete_temp_file(results_filename) + delete_temp_file(header_filename) diff --git a/test/test_kernelbuilder.py b/test/test_kernelbuilder.py index c706e3953..9cd2d0185 100644 --- a/test/test_kernelbuilder.py +++ b/test/test_kernelbuilder.py @@ -3,8 +3,8 @@ import pytest from kernel_tuner import kernelbuilder -from kernel_tuner import util from kernel_tuner import integration +from kernel_tuner.util import delete_temp_file backends = ["cuda", "cupy"] @@ -59,4 +59,4 @@ def test_PythonKernel_tuned(test_kernel, backend): assert np.allclose(reference[0], a+b) finally: - util.delete_temp_file(test_results_file) + delete_temp_file(test_results_file) diff --git a/test/test_runners.py b/test/test_runners.py index acbb641e6..3a0a26e22 100644 --- a/test/test_runners.py +++ b/test/test_runners.py @@ -130,10 +130,16 @@ def test_simulation_runner(env): assert max_time - recorded_time_including_simulation < 10 -def test_diff_evo(env): +def test_constraint_aware_GA(env): + options = dict(method="uniform", + constraint_aware=True, + popsize=5, + maxiter=2, + mutation_chance=10, + max_fevals=10) result, _ = tune_kernel(*env, - strategy="diff_evo", - strategy_options=dict(popsize=5), + strategy="genetic_algorithm", + strategy_options=options, verbose=True, cache=cache_filename, simulation_mode=True) @@ -169,7 +175,7 @@ def test_time_keeping(env): answer=answer) max_time = (time.perf_counter() - start) * 1e3 # ms - assert len(result) >= 10 + assert len(result) >= 10, f"{len(result)=} < 10 for {kernel_name=} with {tune_params=}" timings = [ 'total_framework_time', 'total_strategy_time', 'total_compile_time', diff --git a/test/test_searchspace.py b/test/test_searchspace.py index 20f004051..f742a4b79 100644 --- a/test/test_searchspace.py +++ b/test/test_searchspace.py @@ -27,6 +27,10 @@ simple_searchspace = Searchspace(simple_tune_params, restrict, max_threads) simple_searchspace_bruteforce = Searchspace(simple_tune_params, restrict, max_threads, framework="bruteforce") +simple_tune_params_single = simple_tune_params.copy() +simple_tune_params_single["s"] = [True] +simple_searchspace_single = Searchspace(simple_tune_params_single, restrict, max_threads) + # 3.1 million combinations, of which 10600 pass the restrictions num_layers = 42 tune_params = dict() @@ -242,8 +246,16 @@ def test_neighbors_hamming(): (3, 4, 'string_1'), ] + # test the neighbors __test_neighbors(test_config, expected_neighbors, "Hamming") + # test the random neighbor function + neighbors = simple_searchspace.get_neighbors(test_config, "Hamming") + for i in range(10): + random_neighbor = simple_searchspace.get_random_neighbor(test_config, "Hamming") + assert random_neighbor in neighbors + assert random_neighbor != test_config + def test_neighbors_strictlyadjacent(): """Test whether the strictly adjacent neighbors are as expected.""" @@ -255,8 +267,16 @@ def test_neighbors_strictlyadjacent(): (1.5, 5.5, 'string_2'), ] + # test the neighbors __test_neighbors(test_config, expected_neighbors, "strictly-adjacent") + # test the random neighbor function + neighbors = simple_searchspace.get_neighbors(test_config, "strictly-adjacent") + for i in range(10): + random_neighbor = simple_searchspace.get_random_neighbor(test_config, "strictly-adjacent") + assert random_neighbor in neighbors + assert random_neighbor != test_config + def test_neighbors_adjacent(): """Test whether the adjacent neighbors are as expected.""" @@ -268,8 +288,34 @@ def test_neighbors_adjacent(): (1.5, 5.5, 'string_2'), ] + # test the neighbors __test_neighbors(test_config, expected_neighbors, "adjacent") + # test the random neighbor function + neighbors = simple_searchspace.get_neighbors(test_config, "adjacent") + for i in range(10): + random_neighbor = simple_searchspace.get_random_neighbor(test_config, "adjacent") + assert random_neighbor in neighbors + assert random_neighbor != test_config + +def test_neighbors_closest_param_indices(): + """Test whether the closest parameter indices neighbors are as expected.""" + test_config = tuple([1.5, 4, "string_1"]) + expected_neighbors = [ + (1.5, 5.5, 'string_1'), + (1.5, 4, 'string_2') + ] + + # test the neighbors + __test_neighbors(test_config, expected_neighbors, "closest-param-indices") + + # test the random neighbor function + neighbors = simple_searchspace.get_neighbors(test_config, "closest-param-indices") + for i in range(10): + random_neighbor = simple_searchspace.get_random_neighbor(test_config, "closest-param-indices") + assert random_neighbor in neighbors + assert random_neighbor != test_config + def test_neighbors_fictious(): """Test whether the neighbors are as expected for a fictious parameter configuration (i.e. not existing in the search space due to restrictions).""" @@ -414,6 +460,114 @@ def test_order_param_configs(): assert expected_param_config in ordered_neighbors assert len(ordered_neighbors) == len(expected_order) +def test_true_tunable_params(): + """Test whether the true tunable parameters are correctly identified.""" + # create a searchspace with mixed parameter types + mixed_tune_params = dict() + mixed_tune_params["int_param"] = [1, 2, 3] + mixed_tune_params["float_param"] = [3.0, 4.0, 5.0] + mixed_restrict = ["int_param >= 3"] + + # create the searchspace object + searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads) + + # check the size + assert searchspace.size == 3 + + # check that the true tunable parameters are correctly identified + true_tunable_params = searchspace.get_true_tunable_params() + assert len(true_tunable_params) == 1 + assert "float_param" in true_tunable_params + assert true_tunable_params["float_param"] == mixed_tune_params["float_param"] + + +def test_mixed_param_types(): + """Test whether the searchspace can handle mixed parameter types.""" + # create a searchspace with mixed parameter types + mixed_tune_params = dict() + mixed_tune_params["int_param"] = [1, 2, 3] + mixed_tune_params["float_param"] = [1.0, 2.0, 3.0, -4.4] + mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"] + mixed_tune_params["bool_param"] = [True, False] + mixed_restrict = ["int_param + float_param > 3", "bool_param == False"] + + # create the searchspace object + searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads) + + # check the size + assert searchspace.size == 18 == len(searchspace.list) == len(searchspace.get_list_dict().keys()) + + # check whether param indices are correctly identified + assert searchspace.get_param_indices(tuple([1, 1.0, "Alpha", True])) == (0, 0, 0, 0) + assert searchspace.get_param_indices(tuple([2, 2.0, "Bravo", False])) == (1, 1, 1, 1) + + # check whether the mapping of params to param indices and back works + for param_config in searchspace.list: + param_indices = searchspace.get_param_indices(param_config) + assert searchspace.get_param_config_from_param_indices(param_indices) == param_config + + # check the parameter types + assert all(v1 == v2 for v1, v2 in zip(searchspace.tune_param_is_numeric_mask, [True, True, False, False])) + + # check whether numeric params work as expected + for param_config_numeric, param_config in zip(searchspace.get_list_numpy_numeric(), searchspace.list): + assert searchspace.get_param_config_from_numeric(param_config_numeric) == param_config + + # check whether the true index bounds are as expected + assert all(v1 == v2 for v1, v2 in zip(searchspace.get_param_indices_lower_bounds(), (0, 0, 0, 1))) + assert all(v1 == v2 for v1, v2 in zip(searchspace.get_param_indices_upper_bounds(), (2, 2, 2, 1))) + +def test_get_distributed_random_sample(): + """Test whether the distributed random sample indices are as expected.""" + # create a searchspace with mixed parameter types + mixed_tune_params = dict() + mixed_tune_params["int_param"] = [1, 2, 3] + mixed_tune_params["float_param"] = [1.0, 2.0, 3.0] + mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"] + mixed_tune_params["bool_param"] = [True, False] + mixed_restrict = ["int_param + float_param > 2", "bool_param == False"] + + # create the searchspace object + searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads) + + # check the size + assert searchspace.size == 24 + + # get the distributed random sample indices + num_samples = 10 + distributed_random_sample_indices = searchspace.get_distributed_random_sample_indices(num_samples=num_samples, sampling_factor=2) + + # check that the indices are unique and within bounds + assert len(distributed_random_sample_indices) == num_samples + assert len(set(distributed_random_sample_indices)) == num_samples + for index in distributed_random_sample_indices: + assert 0 <= index < searchspace.size + +def test_get_LHS_sample_indices(): + """Test whether the distributed random sample indices are as expected.""" + # create a searchspace with mixed parameter types + mixed_tune_params = dict() + mixed_tune_params["int_param"] = [1, 2, 3] + mixed_tune_params["float_param"] = [1.0, 2.0, 3.0] + mixed_tune_params["str_param"] = ["Alpha", "Bravo", "Charlie"] + mixed_tune_params["bool_param"] = [True, False] + mixed_restrict = ["int_param + float_param > 2", "bool_param == False"] + + # create the searchspace object + searchspace = Searchspace(mixed_tune_params, mixed_restrict, max_threads) + + # check the size + assert searchspace.size == 24 + + # get the distributed random sample indices + num_samples = 10 + distributed_random_sample_indices = searchspace.get_LHS_sample_indices(num_samples=num_samples) + + # check that the indices are unique and within bounds + assert len(distributed_random_sample_indices) == num_samples + assert len(set(distributed_random_sample_indices)) == num_samples + for index in distributed_random_sample_indices: + assert 0 <= index < searchspace.size def test_small_searchspace(): """Test a small real-world searchspace and the usage of the `max_threads` parameter.""" diff --git a/test/vector_add.cu b/test/vector_add.cu new file mode 100644 index 000000000..e79c16308 --- /dev/null +++ b/test/vector_add.cu @@ -0,0 +1,6 @@ +__global__ void vector_add(float *c, float *a, float *b, int n) { + int i = blockIdx.x * block_size_x + threadIdx.x; + if (i