Skip to content

Commit 73609c4

Browse files
authored
feat: add more detail to the ceph analyzer output (#445)
1 parent 977fc43 commit 73609c4

File tree

2 files changed

+129
-10
lines changed

2 files changed

+129
-10
lines changed

pkg/analyze/ceph.go

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package analyzer
22

33
import (
44
"encoding/json"
5+
"fmt"
56
"path"
67
"strings"
78

@@ -64,6 +65,30 @@ var CephStatusDefaultOutcomes = []*troubleshootv1beta2.Outcome{
6465
},
6566
}
6667

68+
type CephStatus struct {
69+
Health HealthStatus `json:"health"`
70+
OsdMap struct {
71+
OsdMap OsdMap `json:"osdmap"`
72+
} `json:"osdmap"`
73+
PgMap PgMap `json:"pgmap"`
74+
}
75+
76+
type HealthStatus struct {
77+
Status string `json:"status"`
78+
}
79+
80+
type OsdMap struct {
81+
NumOsd int `json:"num_osds"`
82+
NumUpOsd int `json:"num_up_osds"`
83+
Full bool `json:"full"`
84+
NearFull bool `json:"nearfull"`
85+
}
86+
87+
type PgMap struct {
88+
UsedBytes uint64 `json:"bytes_used"`
89+
TotalBytes uint64 `json:"bytes_total"`
90+
}
91+
6792
func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFileContents func(string) ([]byte, error)) (*AnalyzeResult, error) {
6893
fileName := path.Join(collect.GetCephCollectorFilepath(analyzer.CollectorName, analyzer.Namespace), "status.json")
6994
collected, err := getCollectedFileContents(fileName)
@@ -82,11 +107,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
82107
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
83108
}
84109

85-
status := struct {
86-
Health struct {
87-
Status string `json:"status"`
88-
} `json:"health"`
89-
}{}
110+
status := CephStatus{}
90111
if err := json.Unmarshal(collected, &status); err != nil {
91112
return nil, errors.Wrap(err, "failed to unmarshal status.json")
92113
}
@@ -105,7 +126,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
105126
return nil, errors.Wrap(err, "failed to compare ceph status")
106127
} else if match {
107128
analyzeResult.IsFail = true
108-
analyzeResult.Message = outcome.Fail.Message
129+
analyzeResult.Message = detailedCephMessage(outcome.Fail.Message, status)
109130
analyzeResult.URI = outcome.Fail.URI
110131
return analyzeResult, nil
111132
}
@@ -118,7 +139,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
118139
return nil, errors.Wrap(err, "failed to compare ceph status")
119140
} else if match {
120141
analyzeResult.IsWarn = true
121-
analyzeResult.Message = outcome.Warn.Message
142+
analyzeResult.Message = detailedCephMessage(outcome.Warn.Message, status)
122143
analyzeResult.URI = outcome.Warn.URI
123144
return analyzeResult, nil
124145
}
@@ -173,3 +194,22 @@ func compareCephStatus(actual, when string) (bool, error) {
173194
return false, errors.New("unknown operator")
174195
}
175196
}
197+
198+
func detailedCephMessage(msg string, status CephStatus) string {
199+
if status.OsdMap.OsdMap.NumOsd > 0 {
200+
msg = fmt.Sprintf("%s. %v/%v OSDs up", msg, status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd)
201+
}
202+
203+
if status.OsdMap.OsdMap.Full {
204+
msg = fmt.Sprintf("%s. OSD disk is full", msg)
205+
} else if status.OsdMap.OsdMap.NearFull {
206+
msg = fmt.Sprintf("%s. OSD disk is nearly full", msg)
207+
}
208+
209+
if status.PgMap.TotalBytes > 0 {
210+
pgUsage := 100 * float64(status.PgMap.UsedBytes) / float64(status.PgMap.TotalBytes)
211+
msg = fmt.Sprintf("%s. PG storage usage is %.1f%%.", msg, pgUsage)
212+
}
213+
214+
return msg
215+
}

pkg/analyze/ceph_test.go

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,18 @@ func Test_cephStatus(t *testing.T) {
3333
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
3434
"health": {
3535
"status": "HEALTH_OK"
36+
},
37+
"osdmap": {
38+
"osdmap": {
39+
"num_osds": 5,
40+
"num_up_osds": 5,
41+
"full": false,
42+
"nearfull": false
43+
}
44+
},
45+
"pgmap": {
46+
"bytes_used": 10000,
47+
"bytes_total": 100000
3648
}
3749
}`,
3850
},
@@ -44,7 +56,7 @@ func Test_cephStatus(t *testing.T) {
4456
IsWarn: true,
4557
IsFail: false,
4658
Title: "Ceph Status",
47-
Message: "Ceph status is HEALTH_WARN",
59+
Message: "Ceph status is HEALTH_WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
4860
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
4961
IconKey: "rook",
5062
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -54,6 +66,18 @@ func Test_cephStatus(t *testing.T) {
5466
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
5567
"health": {
5668
"status": "HEALTH_WARN"
69+
},
70+
"osdmap": {
71+
"osdmap": {
72+
"num_osds": 5,
73+
"num_up_osds": 5,
74+
"full": false,
75+
"nearfull": true
76+
}
77+
},
78+
"pgmap": {
79+
"bytes_used": 85000,
80+
"bytes_total": 100000
5781
}
5882
}`,
5983
},
@@ -65,7 +89,7 @@ func Test_cephStatus(t *testing.T) {
6589
IsWarn: false,
6690
IsFail: true,
6791
Title: "Ceph Status",
68-
Message: "Ceph status is HEALTH_ERR",
92+
Message: "Ceph status is HEALTH_ERR. 4/5 OSDs up. OSD disk is full. PG storage usage is 95.0%.",
6993
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
7094
IconKey: "rook",
7195
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -75,6 +99,18 @@ func Test_cephStatus(t *testing.T) {
7599
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
76100
"health": {
77101
"status": "HEALTH_ERR"
102+
},
103+
"osdmap": {
104+
"osdmap": {
105+
"num_osds": 5,
106+
"num_up_osds": 4,
107+
"full": true,
108+
"nearfull": true
109+
}
110+
},
111+
"pgmap": {
112+
"bytes_used": 95000,
113+
"bytes_total": 100000
78114
}
79115
}`,
80116
},
@@ -98,6 +134,16 @@ func Test_cephStatus(t *testing.T) {
98134
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
99135
"health": {
100136
"status": "HEALTH_OK"
137+
},
138+
"osdmap": {
139+
"osdmap": {
140+
"full": false,
141+
"nearfull": false
142+
}
143+
},
144+
"pgmap": {
145+
"bytes_used": 10000,
146+
"bytes_total": 100000
101147
}
102148
}`,
103149
},
@@ -126,12 +172,45 @@ func Test_cephStatus(t *testing.T) {
126172
IsWarn: false,
127173
IsFail: true,
128174
Title: "Ceph Status",
129-
Message: "custom message WARN",
175+
Message: "custom message WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
130176
URI: "custom uri WARN",
131177
IconKey: "rook",
132178
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
133179
},
134180
filePath: "ceph/status.json",
181+
file: `{
182+
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
183+
"health": {
184+
"status": "HEALTH_WARN"
185+
},
186+
"osdmap": {
187+
"osdmap": {
188+
"num_osds": 5,
189+
"num_up_osds": 5,
190+
"full": false,
191+
"nearfull": true
192+
}
193+
},
194+
"pgmap": {
195+
"bytes_used": 85000,
196+
"bytes_total": 100000
197+
}
198+
}`,
199+
},
200+
{
201+
name: "warn case with missing osd/pg data",
202+
analyzer: troubleshootv1beta2.CephStatusAnalyze{},
203+
expectResult: AnalyzeResult{
204+
IsPass: false,
205+
IsWarn: true,
206+
IsFail: false,
207+
Title: "Ceph Status",
208+
Message: "Ceph status is HEALTH_WARN",
209+
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
210+
IconKey: "rook",
211+
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
212+
},
213+
filePath: "ceph/status.json",
135214
file: `{
136215
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
137216
"health": {

0 commit comments

Comments
 (0)