@@ -21,6 +21,43 @@ if ! command -v nvme >/dev/null 2>&1; then
2121 exit 1
2222fi
2323
24+ if ! command -v jq > /dev/null 2>&1 ; then
25+ echo " ${0##*/ } : jq is required but not installed. Aborting." >&2
26+ exit 1
27+ fi
28+
29+ # Path to the DWPD ratings JSON file
30+ dwpd_file=" /opt/kayobe/etc/monitoring/dwpd_ratings.json"
31+
32+ declare -A rated_dwpd
33+
34+ load_dwpd_ratings () {
35+ if [[ -f " $dwpd_file " ]]; then
36+ # Read the JSON; if it fails, default to empty array
37+ dwpd_json=" $( cat " $dwpd_file " 2> /dev/null | jq ' .' || echo ' []' ) "
38+
39+ # We iterate over each array element in dwpd_json
40+ while IFS= read -r line; do
41+ key=" $( echo " $line " | jq -r ' .model_name' ) "
42+ value=" $( echo " $line " | jq -r ' .rated_dwpd' ) "
43+
44+ # Clean up trailing whitespace
45+ key=" ${key%% [[:space:]]* } "
46+ value=" ${value%% [[:space:]]* } "
47+
48+ # If we have a valid key, store it in the dictionary
49+ if [[ -n " $key " && " $key " != " null" ]]; then
50+ rated_dwpd[" $key " ]=" $value "
51+ fi
52+ done < <( echo " $dwpd_json " | jq -c ' .[]' )
53+ else
54+ echo " Warning: DWPD ratings file not found at '$dwpd_file '. Defaulting to rated_dwpd=1." >&2
55+ fi
56+ }
57+
58+
59+ load_dwpd_ratings
60+
2461output_format_awk=" $(
2562 cat << 'OUTPUTAWK '
2663BEGIN { v = "" }
@@ -44,58 +81,70 @@ format_output() {
4481nvme_version=" $( nvme version | awk ' $1 == "nvme" {print $3}' ) "
4582echo " nvmecli{version=\" ${nvme_version} \" } 1" | format_output
4683
47- # Get devices (DevicePath and PhysicalSize)
48- device_info=" $( nvme list -o json | jq -c ' .Devices[] | {DevicePath: .DevicePath, PhysicalSize: .PhysicalSize}' ) "
84+ # Get devices (DevicePath, PhysicalSize and ModelNumber)
85+ device_info=" $( nvme list -o json | jq -c ' .Devices[] | {DevicePath, PhysicalSize, ModelNumber, SerialNumber}' ) "
86+
87+ # Convert device_info to an array
88+ device_info_array=()
89+ while IFS= read -r line; do
90+ device_info_array+=(" $line " )
91+ done <<< " $device_info"
4992
5093# Loop through the NVMe devices
51- echo " $device_info " | while read -r device_data ; do
52- device=$( echo " $device_data " | jq -r ' .DevicePath' )
94+ for device_data in " ${device_info_array[@]} " ; do
95+ device=" $( echo " $device_data " | jq -r ' .DevicePath' ) "
5396 json_check=" $( nvme smart-log -o json " ${device} " ) "
5497 disk=" ${device##*/ } "
98+ model_name=" $( echo " $device_data " | jq -r ' .ModelNumber' ) "
99+ serial_number=" $( echo " $device_data " | jq -r ' .SerialNumber' ) "
55100
56- physical_size=$( echo " $device_data " | jq -r ' .PhysicalSize' )
57- echo " physical_size_bytes{device=\" ${disk} \" } ${physical_size} "
101+ physical_size=" $( echo " $device_data " | jq -r ' .PhysicalSize' ) "
102+ echo " physical_size_bytes{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${physical_size} "
58103
59104 # The temperature value in JSON is in Kelvin, we want Celsius
60105 value_temperature=" $( echo " $json_check " | jq ' .temperature - 273' ) "
61- echo " temperature_celsius{device=\" ${disk} \" } ${value_temperature} "
106+ echo " temperature_celsius{device=\" ${disk} \" ,model=\" ${model_name} \" ,serial_number=\" ${serial_number} \" } ${value_temperature} "
107+
108+ # Get the rated DWPD from the dictionary or default to 1 if not found
109+ value_rated_dwpd=" ${rated_dwpd[$model_name]:- 1} "
110+ echo " rated_dwpd{device=\" ${disk} \" ,model=\" ${model_name} \" ,serial_number=\" ${serial_number} \" } ${value_rated_dwpd} "
62111
63112 value_available_spare=" $( echo " $json_check " | jq ' .avail_spare / 100' ) "
64- echo " available_spare_ratio{device=\" ${disk} \" } ${value_available_spare} "
113+ echo " available_spare_ratio{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_available_spare} "
65114
66115 value_available_spare_threshold=" $( echo " $json_check " | jq ' .spare_thresh / 100' ) "
67- echo " available_spare_threshold_ratio{device=\" ${disk} \" } ${value_available_spare_threshold} "
116+ echo " available_spare_threshold_ratio{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_available_spare_threshold} "
68117
69118 value_percentage_used=" $( echo " $json_check " | jq ' .percent_used / 100' ) "
70- echo " percentage_used_ratio{device=\" ${disk} \" } ${value_percentage_used} "
119+ echo " percentage_used_ratio{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_percentage_used} "
71120
72121 value_critical_warning=" $( echo " $json_check " | jq ' .critical_warning' ) "
73- echo " critical_warning_total{device=\" ${disk} \" } ${value_critical_warning} "
122+ echo " critical_warning_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_critical_warning} "
74123
75124 value_media_errors=" $( echo " $json_check " | jq ' .media_errors' ) "
76- echo " media_errors_total{device=\" ${disk} \" } ${value_media_errors} "
125+ echo " media_errors_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_media_errors} "
77126
78127 value_num_err_log_entries=" $( echo " $json_check " | jq ' .num_err_log_entries' ) "
79- echo " num_err_log_entries_total{device=\" ${disk} \" } ${value_num_err_log_entries} "
128+ echo " num_err_log_entries_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_num_err_log_entries} "
80129
81130 value_power_cycles=" $( echo " $json_check " | jq ' .power_cycles' ) "
82- echo " power_cycles_total{device=\" ${disk} \" } ${value_power_cycles} "
131+ echo " power_cycles_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_power_cycles} "
83132
84133 value_power_on_hours=" $( echo " $json_check " | jq ' .power_on_hours' ) "
85- echo " power_on_hours_total{device=\" ${disk} \" } ${value_power_on_hours} "
134+ echo " power_on_hours_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_power_on_hours} "
86135
87136 value_controller_busy_time=" $( echo " $json_check " | jq ' .controller_busy_time' ) "
88- echo " controller_busy_time_seconds{device=\" ${disk} \" } ${value_controller_busy_time} "
137+ echo " controller_busy_time_seconds{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_controller_busy_time} "
89138
90139 value_data_units_written=" $( echo " $json_check " | jq ' .data_units_written' ) "
91- echo " data_units_written_total{device=\" ${disk} \" } ${value_data_units_written} "
140+ echo " data_units_written_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_data_units_written} "
92141
93142 value_data_units_read=" $( echo " $json_check " | jq ' .data_units_read' ) "
94- echo " data_units_read_total{device=\" ${disk} \" } ${value_data_units_read} "
143+ echo " data_units_read_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_data_units_read} "
95144
96145 value_host_read_commands=" $( echo " $json_check " | jq ' .host_read_commands' ) "
97- echo " host_read_commands_total{device=\" ${disk} \" } ${value_host_read_commands} "
146+ echo " host_read_commands_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_host_read_commands} "
98147
99148 value_host_write_commands=" $( echo " $json_check " | jq ' .host_write_commands' ) "
100- echo " host_write_commands_total{device=\" ${disk} \" } ${value_host_write_commands} "
149+ echo " host_write_commands_total{device=\" ${disk} \" ,model= \" ${model_name} \" ,serial_number= \" ${serial_number} \" } ${value_host_write_commands} "
101150done | format_output
0 commit comments