|
| 1 | +#!/usr/bin/env python |
| 2 | +""" |
| 3 | +This example demonstrates how to use the power-frequency model presented in |
| 4 | +
|
| 5 | + * Going green: optimizing GPUs for energy efficiency through model-steered auto-tuning |
| 6 | + R. Schoonhoven, B. Veenboer, B. van Werkhoven, K. J. Batenburg |
| 7 | + International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems (PMBS) at Supercomputing (SC22) 2022 |
| 8 | +
|
| 9 | +to reduce the number of frequencies for GPU energy tuning. |
| 10 | +
|
| 11 | +In particular, this example creates a plot with the modeled power consumption vs |
| 12 | +frequency curve, highlighting the ridge frequency and the frequency range |
| 13 | +selected by the user. |
| 14 | +
|
| 15 | +This example requires CUDA and NVML as well as PyCuda and a CUDA-capable |
| 16 | +GPU with the ability (and permissions) to set applications clocks. GPUs |
| 17 | +that do support locked clocks but not application clocks may use the |
| 18 | +locked_clocks=True option. |
| 19 | +
|
| 20 | +""" |
| 21 | +import argparse |
| 22 | +from collections import OrderedDict |
| 23 | +import numpy as np |
| 24 | +import math |
| 25 | +import matplotlib.pyplot as plt |
| 26 | +from scipy import optimize |
| 27 | +import time |
| 28 | + |
| 29 | +try: |
| 30 | + from pycuda import driver as drv |
| 31 | +except ImportError as e: |
| 32 | + drv = None |
| 33 | + raise e |
| 34 | + |
| 35 | +from kernel_tuner.energy import energy |
| 36 | +from kernel_tuner.nvml import get_nvml_gr_clocks |
| 37 | + |
| 38 | +def get_default_parser(): |
| 39 | + parser = argparse.ArgumentParser( |
| 40 | + description='Find energy efficient frequencies') |
| 41 | + parser.add_argument("-d", dest="device", nargs="?", |
| 42 | + default=0, help="GPU ID to use") |
| 43 | + parser.add_argument("-s", dest="samples", nargs="?", |
| 44 | + default=10, help="Number of frequency samples") |
| 45 | + parser.add_argument("-r", dest="range", nargs="?", |
| 46 | + default=10, help="Frequency spread (10%% of 'optimum')") |
| 47 | + parser.add_argument("-n", dest="number", nargs="?", default=10, |
| 48 | + help="Maximum number of suggested frequencies") |
| 49 | + parser.add_argument("-l", dest="locked_clocks", nargs="?", default=False, |
| 50 | + help="Whether to use locked clocks over application clocks") |
| 51 | + parser.add_argument("-nsf", dest="nvidia_smi_fallback", nargs="?", default=None, |
| 52 | + help="Path to nvidia-smi as fallback when missing NVML permissions") |
| 53 | + |
| 54 | + |
| 55 | + return parser |
| 56 | + |
| 57 | + |
| 58 | +if __name__ == "__main__": |
| 59 | + parser = get_default_parser() |
| 60 | + args = parser.parse_args() |
| 61 | + |
| 62 | + ridge_frequency, freqs, nvml_power, fitted_params, scaling = energy.create_power_frequency_model(device=args.device, |
| 63 | + n_samples=args.samples, |
| 64 | + verbose=True, |
| 65 | + nvidia_smi_fallback=args.nvidia_smi_fallback, |
| 66 | + use_locked_clocks=args.locked_clocks) |
| 67 | + |
| 68 | + all_frequencies = np.array(get_nvml_gr_clocks(args.device, quiet=True)['nvml_gr_clock']) |
| 69 | + |
| 70 | + frequency_selection = energy.get_frequency_range_around_ridge(ridge_frequency, all_frequencies, args.range, args.number, verbose=True) |
| 71 | + print(f"Search space reduction: {np.round(100 - len(frequency_selection) / len(all_frequencies) * 100, 1)} %") |
| 72 | + |
| 73 | + xs = np.linspace(all_frequencies[0], all_frequencies[-1], 100) |
| 74 | + # scale to start at 0 |
| 75 | + xs -= scaling[0] |
| 76 | + modelled_power = energy.estimated_power(xs, *fitted_params) |
| 77 | + # undo scaling |
| 78 | + xs += scaling[0] |
| 79 | + modelled_power *= scaling[1] |
| 80 | + |
| 81 | + # Add point for ridge frequency |
| 82 | + P_ridge = energy.estimated_power([ridge_frequency - scaling[0]], *fitted_params) * scaling[1] |
| 83 | + |
| 84 | + # Add the frequency range |
| 85 | + min_freq = 1e-2 * (100 - int(args.range)) * ridge_frequency |
| 86 | + max_freq = 1e-2 * (100 + int(args.range)) * ridge_frequency |
| 87 | + |
| 88 | + # plot measurements with model |
| 89 | + try: |
| 90 | + import seaborn as sns |
| 91 | + sns.set_theme(style="darkgrid") |
| 92 | + sns.set_context("paper", rc={"font.size":10, |
| 93 | + "axes.titlesize":9, "axes.labelsize":12}) |
| 94 | + fig, ax = plt.subplots() |
| 95 | + except ImportError: |
| 96 | + fig, ax = plt.subplots() |
| 97 | + plt.grid() |
| 98 | + |
| 99 | + plt.scatter(x=freqs, y=nvml_power, label='NVML measurements') |
| 100 | + plt.scatter(x=ridge_frequency, y=P_ridge, color='g', |
| 101 | + label='Ridge frequency (MHz)') |
| 102 | + plt.plot(xs, modelled_power, label='Modelled power consumption') |
| 103 | + ax.axvspan(min_freq, max_freq, alpha=0.15, color='green', |
| 104 | + label='Recommended frequency range') |
| 105 | + plt.title('GPU modelled power consumption', size=18) |
| 106 | + plt.xlabel('Core frequency (MHz)') |
| 107 | + plt.ylabel('Power consumption (W)') |
| 108 | + plt.legend() |
| 109 | + plt.show() |
| 110 | + |
| 111 | + plt.savefig("GPU_power_consumption_model.pdf") |
0 commit comments