@@ -52,13 +52,13 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=
5252 drv .init ()
5353 dev = drv .Device (device )
5454 device_name = dev .name ().replace (' ' , '_' )
55- multiprocessor_count = int ( dev .get_attribute (
56- drv .device_attribute .MULTIPROCESSOR_COUNT ))
57- max_block_dim_x = int ( dev .get_attribute (drv .device_attribute .MAX_BLOCK_DIM_X ) )
55+ multiprocessor_count = dev .get_attribute (
56+ drv .device_attribute .MULTIPROCESSOR_COUNT )
57+ max_block_dim_x = dev .get_attribute (drv .device_attribute .MAX_BLOCK_DIM_X )
5858
5959 # kernel arguments
6060 data_size = (multiprocessor_count , max_block_dim_x )
61- data = np .random .random (np .prod (data_size )).astype (float )
61+ data = np .random .random (np .prod (data_size )).astype (np . float32 )
6262 arguments = [data ]
6363
6464 # setup clocks
@@ -74,7 +74,7 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=
7474 tune_params ["nr_inner" ] = [1024 ]
7575 tune_params .update (nvml_gr_clocks )
7676
77- tune_params ["nvml_gr_clock" ] = [int (c ) for c in tune_params ["nvml_gr_clock" ]]
77+ # tune_params["nvml_gr_clock"] = [int(c) for c in tune_params["nvml_gr_clock"]]
7878
7979 # metrics
8080 metrics = OrderedDict ()
@@ -210,6 +210,5 @@ def get_frequency_range_around_ridge(ridge_frequency, all_frequencies, freq_rang
210210
211211 if verbose :
212212 print (f"Suggested range of frequencies to auto-tune: { frequency_selection } MHz" )
213- print (f"Search space reduction: { np .round (100 - len (frequency_selection ) / len (all_frequencies ) * 100 , 1 )} %%" )
214213
215214 return frequency_selection
0 commit comments