Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ ibswinfo | Collect data on unmanaged switches via ibswinfo (BETA) | Disabled
hca | Collect HCA port counters | Disabled

If you have a node name map file typically used with Subnet Managers, you can provide that file to the `--ibnetdiscover.node-name-map` flag. This will use friendly names for switches.
Even without this flag, exporter still adds the `switch` label populated with whatever hostname (or GUID-style placeholder) ibnetdiscover returns.


If you wish to run the exporter as a user other than root and do not want to use sudo, you must make the UMAD device read/write to all users with something like the following:

```
$ cat /etc/udev/rules.d/99-ib.rules
$ cat /etc/udev/rules.d/99-ib.rules
KERNEL=="umad*", NAME="infiniband/%k" MODE="0666"
```

Expand Down
44 changes: 22 additions & 22 deletions collectors/ibswinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,29 +95,29 @@ func NewIbswinfoCollector(devices *[]InfinibandDevice, runonce bool, logger log.
logger: log.With(logger, "collector", collector),
collector: collector,
Duration: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "collect_duration_seconds"),
"Duration of collection", []string{"guid", "collector"}, nil),
"Duration of collection", []string{"guid", "switch", "collector"}, nil),
Error: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "collect_error"),
"Indicates if collect error", []string{"guid", "collector"}, nil),
"Indicates if collect error", []string{"guid", "switch", "collector"}, nil),
Timeout: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "collect_timeout"),
"Indicates if collect timeout", []string{"guid", "collector"}, nil),
"Indicates if collect timeout", []string{"guid", "switch", "collector"}, nil),
HardwareInfo: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "hardware_info"),
"Infiniband switch hardware info", []string{"guid", "firmware_version", "psid", "part_number", "serial_number", "switch"}, nil),
Uptime: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "uptime_seconds"),
"Infiniband switch uptime in seconds", []string{"guid"}, nil),
"Infiniband switch uptime in seconds", []string{"guid", "switch"}, nil),
PowerSupplyStatus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "power_supply_status_info"),
"Infiniband switch power supply status", []string{"guid", "psu", "status"}, nil),
"Infiniband switch power supply status", []string{"guid", "switch", "psu", "status"}, nil),
PowerSupplyDCPower: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "power_supply_dc_power_status_info"),
"Infiniband switch power supply DC power status", []string{"guid", "psu", "status"}, nil),
"Infiniband switch power supply DC power status", []string{"guid", "switch", "psu", "status"}, nil),
PowerSupplyFanStatus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "power_supply_fan_status_info"),
"Infiniband switch power supply fan status", []string{"guid", "psu", "status"}, nil),
"Infiniband switch power supply fan status", []string{"guid", "switch", "psu", "status"}, nil),
PowerSupplyWatts: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "power_supply_watts"),
"Infiniband switch power supply watts", []string{"guid", "psu"}, nil),
"Infiniband switch power supply watts", []string{"guid", "switch", "psu"}, nil),
Temp: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "temperature_celsius"),
"Infiniband switch temperature celsius", []string{"guid"}, nil),
"Infiniband switch temperature celsius", []string{"guid", "switch"}, nil),
FanStatus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "fan_status_info"),
"Infiniband switch fan status", []string{"guid", "status"}, nil),
"Infiniband switch fan status", []string{"guid", "switch", "status"}, nil),
FanRPM: prometheus.NewDesc(prometheus.BuildFQName(namespace, "switch", "fan_rpm"),
"Infiniband switch fan RPM", []string{"guid", "fan"}, nil),
"Infiniband switch fan RPM", []string{"guid", "switch", "fan"}, nil),
}
}

Expand All @@ -144,33 +144,33 @@ func (s *IbswinfoCollector) Collect(ch chan<- prometheus.Metric) {
for _, swinfo := range swinfos {
ch <- prometheus.MustNewConstMetric(s.HardwareInfo, prometheus.GaugeValue, 1, swinfo.device.GUID,
swinfo.FirmwareVersion, swinfo.PSID, swinfo.PartNumber, swinfo.SerialNumber, swinfo.device.Name)
ch <- prometheus.MustNewConstMetric(s.Uptime, prometheus.GaugeValue, swinfo.Uptime, swinfo.device.GUID)
ch <- prometheus.MustNewConstMetric(s.Duration, prometheus.GaugeValue, swinfo.duration, swinfo.device.GUID, s.collector)
ch <- prometheus.MustNewConstMetric(s.Error, prometheus.GaugeValue, swinfo.error, swinfo.device.GUID, s.collector)
ch <- prometheus.MustNewConstMetric(s.Timeout, prometheus.GaugeValue, swinfo.timeout, swinfo.device.GUID, s.collector)
ch <- prometheus.MustNewConstMetric(s.Uptime, prometheus.GaugeValue, swinfo.Uptime, swinfo.device.GUID, swinfo.device.Name)
ch <- prometheus.MustNewConstMetric(s.Duration, prometheus.GaugeValue, swinfo.duration, swinfo.device.GUID, swinfo.device.Name, s.collector)
ch <- prometheus.MustNewConstMetric(s.Error, prometheus.GaugeValue, swinfo.error, swinfo.device.GUID, swinfo.device.Name, s.collector)
ch <- prometheus.MustNewConstMetric(s.Timeout, prometheus.GaugeValue, swinfo.timeout, swinfo.device.GUID, swinfo.device.Name, s.collector)
for _, psu := range swinfo.PowerSupplies {
if psu.Status != "" {
ch <- prometheus.MustNewConstMetric(s.PowerSupplyStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, psu.ID, psu.Status)
ch <- prometheus.MustNewConstMetric(s.PowerSupplyStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, swinfo.device.Name, psu.ID, psu.Status)
}
if psu.DCPower != "" {
ch <- prometheus.MustNewConstMetric(s.PowerSupplyDCPower, prometheus.GaugeValue, 1, swinfo.device.GUID, psu.ID, psu.DCPower)
ch <- prometheus.MustNewConstMetric(s.PowerSupplyDCPower, prometheus.GaugeValue, 1, swinfo.device.GUID, swinfo.device.Name, psu.ID, psu.DCPower)
}
if psu.FanStatus != "" {
ch <- prometheus.MustNewConstMetric(s.PowerSupplyFanStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, psu.ID, psu.FanStatus)
ch <- prometheus.MustNewConstMetric(s.PowerSupplyFanStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, swinfo.device.Name, psu.ID, psu.FanStatus)
}
if !math.IsNaN(psu.PowerW) {
ch <- prometheus.MustNewConstMetric(s.PowerSupplyWatts, prometheus.GaugeValue, psu.PowerW, swinfo.device.GUID, psu.ID)
ch <- prometheus.MustNewConstMetric(s.PowerSupplyWatts, prometheus.GaugeValue, psu.PowerW, swinfo.device.GUID, swinfo.device.Name, psu.ID)
}
}
if !math.IsNaN(swinfo.Temp) {
ch <- prometheus.MustNewConstMetric(s.Temp, prometheus.GaugeValue, swinfo.Temp, swinfo.device.GUID)
ch <- prometheus.MustNewConstMetric(s.Temp, prometheus.GaugeValue, swinfo.Temp, swinfo.device.GUID, swinfo.device.Name)
}
if swinfo.FanStatus != "" {
ch <- prometheus.MustNewConstMetric(s.FanStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, swinfo.FanStatus)
ch <- prometheus.MustNewConstMetric(s.FanStatus, prometheus.GaugeValue, 1, swinfo.device.GUID, swinfo.device.Name, swinfo.FanStatus)
}
for _, fan := range swinfo.Fans {
if !math.IsNaN(fan.RPM) {
ch <- prometheus.MustNewConstMetric(s.FanRPM, prometheus.GaugeValue, fan.RPM, swinfo.device.GUID, fan.ID)
ch <- prometheus.MustNewConstMetric(s.FanRPM, prometheus.GaugeValue, fan.RPM, swinfo.device.GUID, swinfo.device.Name, fan.ID)
}
}
}
Expand Down
78 changes: 39 additions & 39 deletions collectors/ibswinfo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,63 +223,63 @@ func TestIbswinfoCollector(t *testing.T) {
infiniband_exporter_collect_timeouts{collector="ibswinfo"} 0
# HELP infiniband_switch_fan_rpm Infiniband switch fan RPM
# TYPE infiniband_switch_fan_rpm gauge
infiniband_switch_fan_rpm{fan="1",guid="0x506b4b03005c2740"} 6125
infiniband_switch_fan_rpm{fan="1",guid="0x7cfe9003009ce5b0"} 8493
infiniband_switch_fan_rpm{fan="2",guid="0x506b4b03005c2740"} 5251
infiniband_switch_fan_rpm{fan="2",guid="0x7cfe9003009ce5b0"} 7349
infiniband_switch_fan_rpm{fan="3",guid="0x506b4b03005c2740"} 6013
infiniband_switch_fan_rpm{fan="3",guid="0x7cfe9003009ce5b0"} 8441
infiniband_switch_fan_rpm{fan="4",guid="0x506b4b03005c2740"} 5335
infiniband_switch_fan_rpm{fan="4",guid="0x7cfe9003009ce5b0"} 7270
infiniband_switch_fan_rpm{fan="5",guid="0x506b4b03005c2740"} 6068
infiniband_switch_fan_rpm{fan="5",guid="0x7cfe9003009ce5b0"} 8337
infiniband_switch_fan_rpm{fan="6",guid="0x506b4b03005c2740"} 5423
infiniband_switch_fan_rpm{fan="6",guid="0x7cfe9003009ce5b0"} 7156
infiniband_switch_fan_rpm{fan="7",guid="0x506b4b03005c2740"} 5854
infiniband_switch_fan_rpm{fan="7",guid="0x7cfe9003009ce5b0"} 8441
infiniband_switch_fan_rpm{fan="8",guid="0x506b4b03005c2740"} 5467
infiniband_switch_fan_rpm{fan="8",guid="0x7cfe9003009ce5b0"} 7232
infiniband_switch_fan_rpm{fan="9",guid="0x506b4b03005c2740"} 5906
infiniband_switch_fan_rpm{fan="1",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 6125
infiniband_switch_fan_rpm{fan="1",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 8493
infiniband_switch_fan_rpm{fan="2",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 5251
infiniband_switch_fan_rpm{fan="2",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 7349
infiniband_switch_fan_rpm{fan="3",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 6013
infiniband_switch_fan_rpm{fan="3",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 8441
infiniband_switch_fan_rpm{fan="4",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 5335
infiniband_switch_fan_rpm{fan="4",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 7270
infiniband_switch_fan_rpm{fan="5",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 6068
infiniband_switch_fan_rpm{fan="5",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 8337
infiniband_switch_fan_rpm{fan="6",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 5423
infiniband_switch_fan_rpm{fan="6",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 7156
infiniband_switch_fan_rpm{fan="7",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 5854
infiniband_switch_fan_rpm{fan="7",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 8441
infiniband_switch_fan_rpm{fan="8",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 5467
infiniband_switch_fan_rpm{fan="8",guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 7232
infiniband_switch_fan_rpm{fan="9",guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 5906
# HELP infiniband_switch_fan_status_info Infiniband switch fan status
# TYPE infiniband_switch_fan_status_info gauge
infiniband_switch_fan_status_info{guid="0x506b4b03005c2740",status="OK"} 1
infiniband_switch_fan_status_info{guid="0x7cfe9003009ce5b0",status="ERROR"} 1
infiniband_switch_fan_status_info{guid="0x506b4b03005c2740",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_fan_status_info{guid="0x7cfe9003009ce5b0",status="ERROR",switch="ib-i1l1s01"} 1
# HELP infiniband_switch_hardware_info Infiniband switch hardware info
# TYPE infiniband_switch_hardware_info gauge
infiniband_switch_hardware_info{firmware_version="11.2008.2102",guid="0x7cfe9003009ce5b0",part_number="MSB7790-ES2F",psid="MT_1880110032",serial_number="MT1943X00498",switch="ib-i1l1s01"} 1
infiniband_switch_hardware_info{firmware_version="27.2010.3118",guid="0x506b4b03005c2740",part_number="MQM8790-HS2F",psid="MT_0000000063",serial_number="MT2152T10239",switch="ib-i4l1s01"} 1
# HELP infiniband_switch_power_supply_dc_power_status_info Infiniband switch power supply DC power status
# TYPE infiniband_switch_power_supply_dc_power_status_info gauge
infiniband_switch_power_supply_dc_power_status_info{guid="0x506b4b03005c2740",psu="0",status="OK"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x506b4b03005c2740",psu="1",status="OK"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x7cfe9003009ce5b0",psu="0",status="OK"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x7cfe9003009ce5b0",psu="1",status="OK"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x506b4b03005c2740",psu="0",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x506b4b03005c2740",psu="1",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x7cfe9003009ce5b0",psu="0",status="OK",switch="ib-i1l1s01"} 1
infiniband_switch_power_supply_dc_power_status_info{guid="0x7cfe9003009ce5b0",psu="1",status="OK",switch="ib-i1l1s01"} 1
# HELP infiniband_switch_power_supply_fan_status_info Infiniband switch power supply fan status
# TYPE infiniband_switch_power_supply_fan_status_info gauge
infiniband_switch_power_supply_fan_status_info{guid="0x506b4b03005c2740",psu="0",status="OK"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x506b4b03005c2740",psu="1",status="OK"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x7cfe9003009ce5b0",psu="0",status="OK"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x7cfe9003009ce5b0",psu="1",status="OK"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x506b4b03005c2740",psu="0",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x506b4b03005c2740",psu="1",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x7cfe9003009ce5b0",psu="0",status="OK",switch="ib-i1l1s01"} 1
infiniband_switch_power_supply_fan_status_info{guid="0x7cfe9003009ce5b0",psu="1",status="OK",switch="ib-i1l1s01"} 1
# HELP infiniband_switch_power_supply_status_info Infiniband switch power supply status
# TYPE infiniband_switch_power_supply_status_info gauge
infiniband_switch_power_supply_status_info{guid="0x506b4b03005c2740",psu="0",status="OK"} 1
infiniband_switch_power_supply_status_info{guid="0x506b4b03005c2740",psu="1",status="OK"} 1
infiniband_switch_power_supply_status_info{guid="0x7cfe9003009ce5b0",psu="0",status="OK"} 1
infiniband_switch_power_supply_status_info{guid="0x7cfe9003009ce5b0",psu="1",status="OK"} 1
infiniband_switch_power_supply_status_info{guid="0x506b4b03005c2740",psu="0",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_power_supply_status_info{guid="0x506b4b03005c2740",psu="1",status="OK",switch="ib-i4l1s01"} 1
infiniband_switch_power_supply_status_info{guid="0x7cfe9003009ce5b0",psu="0",status="OK",switch="ib-i1l1s01"} 1
infiniband_switch_power_supply_status_info{guid="0x7cfe9003009ce5b0",psu="1",status="OK",switch="ib-i1l1s01"} 1
# HELP infiniband_switch_power_supply_watts Infiniband switch power supply watts
# TYPE infiniband_switch_power_supply_watts gauge
infiniband_switch_power_supply_watts{guid="0x506b4b03005c2740",psu="0"} 154
infiniband_switch_power_supply_watts{guid="0x506b4b03005c2740",psu="1"} 134
infiniband_switch_power_supply_watts{guid="0x7cfe9003009ce5b0",psu="0"} 72
infiniband_switch_power_supply_watts{guid="0x7cfe9003009ce5b0",psu="1"} 71
infiniband_switch_power_supply_watts{guid="0x506b4b03005c2740",psu="0",switch="ib-i4l1s01"} 154
infiniband_switch_power_supply_watts{guid="0x506b4b03005c2740",psu="1",switch="ib-i4l1s01"} 134
infiniband_switch_power_supply_watts{guid="0x7cfe9003009ce5b0",psu="0",switch="ib-i1l1s01"} 72
infiniband_switch_power_supply_watts{guid="0x7cfe9003009ce5b0",psu="1",switch="ib-i1l1s01"} 71
# HELP infiniband_switch_temperature_celsius Infiniband switch temperature celsius
# TYPE infiniband_switch_temperature_celsius gauge
infiniband_switch_temperature_celsius{guid="0x506b4b03005c2740"} 53
infiniband_switch_temperature_celsius{guid="0x7cfe9003009ce5b0"} 45
infiniband_switch_temperature_celsius{guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 53
infiniband_switch_temperature_celsius{guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 45
# HELP infiniband_switch_uptime_seconds Infiniband switch uptime in seconds
# TYPE infiniband_switch_uptime_seconds gauge
infiniband_switch_uptime_seconds{guid="0x506b4b03005c2740"} 8301347
infiniband_switch_uptime_seconds{guid="0x7cfe9003009ce5b0"} 13862333
infiniband_switch_uptime_seconds{guid="0x506b4b03005c2740",switch="ib-i4l1s01"} 8301347
infiniband_switch_uptime_seconds{guid="0x7cfe9003009ce5b0",switch="ib-i1l1s01"} 13862333
`
collector := NewIbswinfoCollector(&switchDevices, false, log.NewNopLogger())
gatherers := setupGatherer(collector)
Expand Down
Loading