diff --git a/go.sum b/go.sum index 5dfb0e5..7ab5a8f 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+ github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cfsmp3/gonvml v0.0.0-20190828220739-9ebdce4bb989 h1:yERlgatNHz1DICCSbpndUoBwtQX8haKGQaW0X3rbyT4= github.com/cfsmp3/gonvml v0.0.0-20190828220739-9ebdce4bb989/go.mod h1:mHePyfjLFeCKiqdBbfcp6EsZ8DuiqmyErsxO9r/H9FQ= +github.com/cfsmp3/gonvml v0.0.6 h1:NA4Ac44F8SMHLhDh+wnjmut1wG3sep+kCQSdwJ+msYo= +github.com/cfsmp3/gonvml v0.0.6/go.mod h1:mHePyfjLFeCKiqdBbfcp6EsZ8DuiqmyErsxO9r/H9FQ= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= diff --git a/main.go b/main.go index 64bf9df..681371a 100644 --- a/main.go +++ b/main.go @@ -23,6 +23,9 @@ const ( var ( addr = flag.String("web.listen-address", ":9445", "Address to listen on for web interface and telemetry.") enableFanSpeed = flag.Bool("enable-fanspeed", true, "Enable fanspeed metric") + enablePowerLimits = flag.Bool("enable-powerlimits", true, "Enable power limit metrics") + enableAveragePowerUsage = flag.Bool("enable-averagepowerusage", true, "Enable average power usage metric") + labels = []string{"minor_number", "uuid", "name"} @@ -565,11 +568,13 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { c.powerUsage.WithLabelValues(minor, uuid, name).Set(float64(powerUsage/1000)) } - avgPowerUsage, err := dev.AveragePowerUsage(averageDuration) - if err != nil { - log.Printf("AveragePowerUsage() error: %v", err) - } else { - c.avgPowerUsage.WithLabelValues(minor, uuid, name).Set(float64(avgPowerUsage/1000)) + if *enableAveragePowerUsage { + avgPowerUsage, err := dev.AveragePowerUsage(averageDuration) + if err != nil { + log.Printf("AveragePowerUsage() error: %v", err) + } else { + c.avgPowerUsage.WithLabelValues(minor, uuid, name).Set(float64(avgPowerUsage/1000)) + } } energyConsumption, err := dev.TotalEnergyConsumption() @@ -579,26 +584,28 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { c.energyConsumption.WithLabelValues(minor, uuid, name).Set(float64(energyConsumption/1000)) } - powerLimitConstraintsMin, powerLimitConstraintsMax, err := dev.PowerLimitConstraints() - if err != nil { - log.Printf("PowerLimitConstraints() error: %v", err) - } else { - c.powerLimitConstraintsMin.WithLabelValues(minor, uuid, name).Set(float64(powerLimitConstraintsMin/1000)) - c.powerLimitConstraintsMax.WithLabelValues(minor, uuid, name).Set(float64(powerLimitConstraintsMax/1000)) - } + if *enablePowerLimits { + powerLimitConstraintsMin, powerLimitConstraintsMax, err := dev.PowerLimitConstraints() + if err != nil { + log.Printf("PowerLimitConstraints() error: %v", err) + } else { + c.powerLimitConstraintsMin.WithLabelValues(minor, uuid, name).Set(float64(powerLimitConstraintsMin/1000)) + c.powerLimitConstraintsMax.WithLabelValues(minor, uuid, name).Set(float64(powerLimitConstraintsMax/1000)) + } - powerLimitManagement, powerLimitEnforced, err := dev.PowerLimits() - if err != nil { - log.Printf("PowerLimits() error: %v", err) - } else { - c.powerLimitManagement.WithLabelValues(minor, uuid, name).Set(float64(powerLimitManagement/1000)) - c.powerLimitEnforced.WithLabelValues(minor, uuid, name).Set(float64(powerLimitEnforced/1000)) - } - powerManagementDefaultLimit, err := dev.PowerManagementDefaultLimit() - if err != nil { - log.Printf("PowerManagementDefaultLimit() error: %v", err) - } else { - c.powerManagementDefaultLimit.WithLabelValues(minor, uuid, name).Set(float64(powerManagementDefaultLimit/1000)) + powerLimitManagement, powerLimitEnforced, err := dev.PowerLimits() + if err != nil { + log.Printf("PowerLimits() error: %v", err) + } else { + c.powerLimitManagement.WithLabelValues(minor, uuid, name).Set(float64(powerLimitManagement/1000)) + c.powerLimitEnforced.WithLabelValues(minor, uuid, name).Set(float64(powerLimitEnforced/1000)) + } + powerManagementDefaultLimit, err := dev.PowerManagementDefaultLimit() + if err != nil { + log.Printf("PowerManagementDefaultLimit() error: %v", err) + } else { + c.powerManagementDefaultLimit.WithLabelValues(minor, uuid, name).Set(float64(powerManagementDefaultLimit/1000)) + } } temperature, err := dev.Temperature() diff --git a/nvidia_gpu_prometheus_exporter b/nvidia_gpu_prometheus_exporter index dd71309..26cfca9 100755 Binary files a/nvidia_gpu_prometheus_exporter and b/nvidia_gpu_prometheus_exporter differ