1+ {
2+ "device_name" : " NVIDIA RTX A4000" ,
3+ "kernel_name" : " fp32_kernel" ,
4+ "problem_size" : [
5+ 48 ,
6+ 64
7+ ],
8+ "tune_params_keys" : [
9+ " block_size_x" ,
10+ " nr_outer" ,
11+ " nr_inner" ,
12+ " nvml_gr_clock"
13+ ],
14+ "tune_params" : {
15+ "block_size_x" : [
16+ 1024
17+ ],
18+ "nr_outer" : [
19+ 64
20+ ],
21+ "nr_inner" : [
22+ 1024
23+ ],
24+ "nvml_gr_clock" : [
25+ 2100 ,
26+ 1905 ,
27+ 1710 ,
28+ 1530 ,
29+ 1335 ,
30+ 1155 ,
31+ 960 ,
32+ 780 ,
33+ 585 ,
34+ 405
35+ ]
36+ },
37+ "cache" : {
38+ "1024,64,1024,2100" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 2100 , "core_freq" : 1739.7175427234456 , "time" : 77.53778305053712 , "times" : [75.01824188232422 , 78.39437103271484 , 77.64275360107422 , 77.55980682373047 , 77.51065826416016 , 77.77279663085938 , 77.90386962890625 , 77.73798370361328 , 78.08905792236328 , 77.748291015625 ], "pwr_core_freq" : 1722.6449009537785 , "nvml_power" : 139.161 , "compile_time" : 3950.955282896757 , "verification_time" : 0 , "benchmark_time" : 1796.9796061515808 , "f" : 1739.7175427234456 , "strategy_time" : 0 , "framework_time" : 2.5989077985286713 },
39+ "1024,64,1024,1905" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 1905 , "core_freq" : 1752.1604878240541 , "time" : 76.95706253051758 , "times" : [74.33216094970703 , 77.56800079345703 , 77.43180847167969 , 77.14498901367188 , 77.5198745727539 , 76.99967956542969 , 76.99763488769531 , 77.12767791748047 , 77.14508819580078 , 77.3037109375 ], "pwr_core_freq" : 1731.5070093457944 , "nvml_power" : 138.871 , "compile_time" : 3948.057930916548 , "verification_time" : 0 , "benchmark_time" : 2191.8914653360844 , "f" : 1752.1604878240541 , "strategy_time" : 0 , "framework_time" : 1.8665380775928497 },
40+ "1024,64,1024,1710" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 1710 , "core_freq" : 1710.0 , "time" : 78.82117233276367 , "times" : [78.94528198242188 , 78.84595489501953 , 78.80601501464844 , 78.80397033691406 , 78.79987335205078 , 78.82546997070312 , 78.79373168945312 , 78.79065704345703 , 78.80089569091797 , 78.79987335205078 ], "pwr_core_freq" : 1710.0 , "nvml_power" : 133.824 , "compile_time" : 3941.864926367998 , "verification_time" : 0 , "benchmark_time" : 2163.301534950733 , "f" : 1710.0 , "strategy_time" : 0 , "framework_time" : 1.9937194883823395 },
41+ "1024,64,1024,1530" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 1530 , "core_freq" : 1530.0 , "time" : 88.1852409362793 , "times" : [88.99378967285156 , 88.22271728515625 , 88.0711669921875 , 88.07730865478516 , 88.07014465332031 , 88.06400299072266 , 88.07936096191406 , 88.1244125366211 , 88.0732192993164 , 88.07628631591797 ], "pwr_core_freq" : 1530.0 , "nvml_power" : 103.031 , "compile_time" : 3972.328145056963 , "verification_time" : 0 , "benchmark_time" : 2333.6658142507076 , "f" : 1530.0 , "strategy_time" : 0 , "framework_time" : 1.7150826752185822 },
42+ "1024,64,1024,1335" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 1335 , "core_freq" : 1335.0 , "time" : 100.94090194702149 , "times" : [101.01964569091797 , 100.91929626464844 , 100.92953491210938 , 100.92031860351562 , 100.93772888183594 , 100.93260955810547 , 100.9438705444336 , 100.93465423583984 , 100.9438705444336 , 100.927490234375 ], "pwr_core_freq" : 1335.0 , "nvml_power" : 79.923 , "compile_time" : 3961.2973630428314 , "verification_time" : 0 , "benchmark_time" : 2362.926233559847 , "f" : 1335.0 , "strategy_time" : 0 , "framework_time" : 2.0712725818157196 },
43+ "1024,64,1024,1155" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 1155 , "core_freq" : 1155.0 , "time" : 116.66185607910157 , "times" : [116.74111938476562 , 116.66124725341797 , 116.66226959228516 , 116.63359832763672 , 116.66124725341797 , 116.65299224853516 , 116.63359832763672 , 116.66329956054688 , 116.66944122314453 , 116.6397476196289 ], "pwr_core_freq" : 1155.0 , "nvml_power" : 72.241 , "compile_time" : 3956.4162008464336 , "verification_time" : 0 , "benchmark_time" : 2606.1498671770096 , "f" : 1155.0 , "strategy_time" : 0 , "framework_time" : 1.7136447131633759 },
44+ "1024,64,1024,960" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 960 , "core_freq" : 960.0 , "time" : 140.37103271484375 , "times" : [140.4620819091797 , 140.3555145263672 , 140.36480712890625 , 140.35353088378906 , 140.3934783935547 , 140.39859008789062 , 140.33920288085938 , 140.37298583984375 , 140.31666564941406 , 140.3534698486328 ], "pwr_core_freq" : 960.0 , "nvml_power" : 65.004 , "compile_time" : 3970.390599220991 , "verification_time" : 0 , "benchmark_time" : 2920.069545507431 , "f" : 960.0 , "strategy_time" : 0 , "framework_time" : 2.164263278245926 },
45+ "1024,64,1024,780" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 780 , "core_freq" : 780.0 , "time" : 172.72442779541015 , "times" : [172.832763671875 , 172.7651824951172 , 172.6945343017578 , 172.71192932128906 , 172.7139892578125 , 172.71192932128906 , 172.71090698242188 , 172.7262725830078 , 172.70375061035156 , 172.6730194091797 ], "pwr_core_freq" : 780.0 , "nvml_power" : 58.972 , "compile_time" : 3970.3225307166576 , "verification_time" : 0 , "benchmark_time" : 3119.3020306527615 , "f" : 780.0 , "strategy_time" : 0 , "framework_time" : 1.7446689307689667 },
46+ "1024,64,1024,585" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 585 , "core_freq" : 585.0 , "time" : 230.3109100341797 , "times" : [230.466552734375 , 230.2566375732422 , 230.37440490722656 , 230.35186767578125 , 230.26585388183594 , 230.2678985595703 , 230.2228546142578 , 230.27711486816406 , 230.35903930664062 , 230.26687622070312 ], "pwr_core_freq" : 585.0 , "nvml_power" : 52.095 , "compile_time" : 3952.59528234601 , "verification_time" : 0 , "benchmark_time" : 3827.805496752262 , "f" : 585.0 , "strategy_time" : 0 , "framework_time" : 1.6073323786258698 },
47+ "1024,64,1024,405" : {"block_size_x" : 1024 , "nr_outer" : 64 , "nr_inner" : 1024 , "nvml_gr_clock" : 405 , "core_freq" : 405.0 , "time" : 332.58628845214844 , "times" : [332.5245361328125 , 332.55218505859375 , 332.5777893066406 , 332.6330871582031 , 332.60748291015625 , 332.5696105957031 , 332.5962219238281 , 332.6740417480469 , 332.5317077636719 , 332.5962219238281 ], "pwr_core_freq" : 405.0 , "nvml_power" : 43.928 , "compile_time" : 3988.419521600008 , "verification_time" : 0 , "benchmark_time" : 5038.08306902647 , "f" : 405.0 , "strategy_time" : 0 , "framework_time" : 1.7108693718910217 }}
48+ }
0 commit comments