|
41 | 41 | } |
42 | 42 | """ |
43 | 43 |
|
44 | | -def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None): |
| 44 | +def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None): |
45 | 45 | """ Use NVML and PyCUDA with a synthetic kernel to obtain samples of frequency-power pairs """ |
46 | 46 |
|
47 | 47 | # get some numbers about the device |
@@ -85,7 +85,7 @@ def get_frequency_power_relation_fp32(device, n_samples=10, nvidia_smi_fallback= |
85 | 85 |
|
86 | 86 | results, _ = tune_kernel("fp32_kernel", fp32_kernel_string, problem_size=(multiprocessor_count, 64), |
87 | 87 | arguments=arguments, tune_params=tune_params, observers=[nvmlobserver], |
88 | | - verbose=False, quiet=True, metrics=metrics, iterations=10, |
| 88 | + verbose=False, quiet=True, metrics=metrics, iterations=10, simulation_mode=simulation_mode, |
89 | 89 | grid_div_x=[], grid_div_y=[], cache=cache or f"synthetic_fp32_cache_{device_name}.json") |
90 | 90 |
|
91 | 91 | freqs = np.array([res["core_freq"] for res in results]) |
@@ -147,7 +147,7 @@ def fit_power_frequency_model(freqs, nvml_power): |
147 | 147 | return clock_threshold + clock_min, fit_parameters, scale_parameters |
148 | 148 |
|
149 | 149 |
|
150 | | -def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None): |
| 150 | +def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_smi_fallback=None, use_locked_clocks=False, cache=None, simulation_mode=None): |
151 | 151 | """ Calculate the most energy-efficient clock frequency of device |
152 | 152 |
|
153 | 153 | This function uses a performance model to fit the power-frequency curve |
@@ -181,7 +181,7 @@ def create_power_frequency_model(device=0, n_samples=10, verbose=False, nvidia_s |
181 | 181 | :rtype: float |
182 | 182 |
|
183 | 183 | """ |
184 | | - freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache) |
| 184 | + freqs, nvml_power = get_frequency_power_relation_fp32(device, n_samples, nvidia_smi_fallback, use_locked_clocks, cache=cache, simulation_mode=simulation_mode) |
185 | 185 |
|
186 | 186 | if verbose: |
187 | 187 | print("Clock frequencies:", freqs.tolist()) |
|
0 commit comments