Skip to content

Commit 24096b0

Browse files
Merge pull request #575 from replicatedhq/diamonwiggins/sc-40946/ceph-analyzer-health-status
Ceph health status messages in Analzyer result
2 parents c2136fc + cccc9f2 commit 24096b0

File tree

2 files changed

+73
-10
lines changed

2 files changed

+73
-10
lines changed

pkg/analyze/ceph.go

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,17 @@ type CephStatus struct {
7474
}
7575

7676
type HealthStatus struct {
77-
Status string `json:"status"`
77+
Status string `json:"status"`
78+
Checks map[string]CheckMessage `json:"checks"`
79+
}
80+
81+
type CheckMessage struct {
82+
Severity string `json:"severity"`
83+
Summary Summary `json:"summary"`
84+
}
85+
86+
type Summary struct {
87+
Message string `json:"message"`
7888
}
7989

8090
type OsdMap struct {
@@ -121,6 +131,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
121131
if outcome.Fail.When == "" {
122132
outcome.Fail.When = string(CephHealthErr)
123133
}
134+
124135
match, err := compareCephStatus(status.Health.Status, outcome.Fail.When)
125136
if err != nil {
126137
return nil, errors.Wrap(err, "failed to compare ceph status")
@@ -134,6 +145,7 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
134145
if outcome.Warn.When == "" {
135146
outcome.Warn.When = string(CephHealthWarn)
136147
}
148+
137149
match, err := compareCephStatus(status.Health.Status, outcome.Warn.When)
138150
if err != nil {
139151
return nil, errors.Wrap(err, "failed to compare ceph status")
@@ -147,13 +159,15 @@ func cephStatus(analyzer *troubleshootv1beta2.CephStatusAnalyze, getCollectedFil
147159
if outcome.Pass.When == "" {
148160
outcome.Pass.When = string(CephHealthOK)
149161
}
162+
150163
match, err := compareCephStatus(status.Health.Status, outcome.Pass.When)
151164
if err != nil {
152165
return nil, errors.Wrap(err, "failed to compare ceph status")
153166
} else if match {
154167
analyzeResult.IsPass = true
155168
analyzeResult.Message = outcome.Pass.Message
156169
analyzeResult.URI = outcome.Pass.URI
170+
157171
return analyzeResult, nil
158172
}
159173
}
@@ -195,21 +209,33 @@ func compareCephStatus(actual, when string) (bool, error) {
195209
}
196210
}
197211

198-
func detailedCephMessage(msg string, status CephStatus) string {
212+
func detailedCephMessage(outcomeMessage string, status CephStatus) string {
213+
var msg = []string{}
214+
215+
if outcomeMessage != "" {
216+
msg = append(msg, outcomeMessage)
217+
}
218+
199219
if status.OsdMap.OsdMap.NumOsd > 0 {
200-
msg = fmt.Sprintf("%s. %v/%v OSDs up", msg, status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd)
220+
msg = append(msg, fmt.Sprintf("%v/%v OSDs up", status.OsdMap.OsdMap.NumUpOsd, status.OsdMap.OsdMap.NumOsd))
201221
}
202222

203223
if status.OsdMap.OsdMap.Full {
204-
msg = fmt.Sprintf("%s. OSD disk is full", msg)
224+
msg = append(msg, fmt.Sprintf("OSD disk is full"))
205225
} else if status.OsdMap.OsdMap.NearFull {
206-
msg = fmt.Sprintf("%s. OSD disk is nearly full", msg)
226+
msg = append(msg, fmt.Sprintf("OSD disk is nearly full"))
207227
}
208228

209229
if status.PgMap.TotalBytes > 0 {
210230
pgUsage := 100 * float64(status.PgMap.UsedBytes) / float64(status.PgMap.TotalBytes)
211-
msg = fmt.Sprintf("%s. PG storage usage is %.1f%%.", msg, pgUsage)
231+
msg = append(msg, fmt.Sprintf("PG storage usage is %.1f%%", pgUsage))
232+
}
233+
234+
if status.Health.Checks != nil {
235+
for k, v := range status.Health.Checks {
236+
msg = append(msg, fmt.Sprintf("%s: %s", k, v.Summary.Message))
237+
}
212238
}
213239

214-
return msg
240+
return strings.Join(msg, "\n")
215241
}

pkg/analyze/ceph_test.go

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func Test_cephStatus(t *testing.T) {
5656
IsWarn: true,
5757
IsFail: false,
5858
Title: "Ceph Status",
59-
Message: "Ceph status is HEALTH_WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
59+
Message: "Ceph status is HEALTH_WARN\n5/5 OSDs up\nOSD disk is nearly full\nPG storage usage is 85.0%",
6060
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
6161
IconKey: "rook",
6262
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -89,7 +89,7 @@ func Test_cephStatus(t *testing.T) {
8989
IsWarn: false,
9090
IsFail: true,
9191
Title: "Ceph Status",
92-
Message: "Ceph status is HEALTH_ERR. 4/5 OSDs up. OSD disk is full. PG storage usage is 95.0%.",
92+
Message: "Ceph status is HEALTH_ERR\n4/5 OSDs up\nOSD disk is full\nPG storage usage is 95.0%",
9393
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
9494
IconKey: "rook",
9595
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -172,7 +172,7 @@ func Test_cephStatus(t *testing.T) {
172172
IsWarn: false,
173173
IsFail: true,
174174
Title: "Ceph Status",
175-
Message: "custom message WARN. 5/5 OSDs up. OSD disk is nearly full. PG storage usage is 85.0%.",
175+
Message: "custom message WARN\n5/5 OSDs up\nOSD disk is nearly full\nPG storage usage is 85.0%",
176176
URI: "custom uri WARN",
177177
IconKey: "rook",
178178
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
@@ -218,6 +218,43 @@ func Test_cephStatus(t *testing.T) {
218218
}
219219
}`,
220220
},
221+
{
222+
name: "warn case with multiple health status messages",
223+
analyzer: troubleshootv1beta2.CephStatusAnalyze{},
224+
expectResult: AnalyzeResult{
225+
IsPass: false,
226+
IsWarn: true,
227+
IsFail: false,
228+
Title: "Ceph Status",
229+
Message: "Ceph status is HEALTH_WARN\nPOOL_NO_REDUNDANCY: 11 pool(s) have no replicas configured\nPOOL_PG_NUM_NOT_POWER_OF_TWO: 8 pool(s) have non-power-of-two pg_num",
230+
URI: "https://rook.io/docs/rook/v1.4/ceph-common-issues.html",
231+
IconKey: "rook",
232+
IconURI: "https://troubleshoot.sh/images/analyzer-icons/rook.svg?w=11&h=16",
233+
},
234+
filePath: "ceph/status.json",
235+
file: `{
236+
"fsid": "96a8178c-6aa2-4adf-a309-9e8869a79611",
237+
"health": {
238+
"status": "HEALTH_WARN",
239+
"checks": {
240+
"POOL_NO_REDUNDANCY": {
241+
"severity": "HEALTH_WARN",
242+
"summary": {
243+
"message": "11 pool(s) have no replicas configured",
244+
"count": 11
245+
},
246+
"muted": false
247+
},
248+
"POOL_PG_NUM_NOT_POWER_OF_TWO": {
249+
"severity": "HEALTH_WARN",
250+
"summary": {
251+
"message": "8 pool(s) have non-power-of-two pg_num"
252+
}
253+
}
254+
}
255+
}
256+
}`,
257+
},
221258
}
222259

223260
for _, test := range tests {

0 commit comments

Comments
 (0)