Skip to content

Commit ea9d866

Browse files
authored
Merge pull request #51 from NETWAYS/feature/snapshot
Add subcommand for snapshots
2 parents 4dc8764 + 39769b1 commit ea9d866

File tree

5 files changed

+348
-0
lines changed

5 files changed

+348
-0
lines changed

README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,46 @@ check_elasticsearch ingest --pipeline foobar
130130
\_[OK] Failed ingest operations for foobar: 5; | pipelines.foobar.failed=5c
131131
```
132132

133+
### Snapshot
134+
135+
Checks status of Snapshots.
136+
137+
```
138+
Checks the status of Elasticsearch snapshots
139+
The plugin maps snapshot status to the following status codes:
140+
141+
SUCCESS, Exit code 0
142+
PARTIAL, Exit code 1
143+
FAILED, Exit code 2
144+
IN_PROGRESS, Exit code 3
145+
146+
If there are multiple snapshots the plugin uses the worst status
147+
148+
Usage:
149+
check_elasticsearch snapshot [flags]
150+
151+
Flags:
152+
-a, --all Check all retrieved snapshots. If not set only the latest snapshot is checked
153+
-N, --number int Check latest N number snapshots. If not set only the latest snapshot is checked (default 1)
154+
-r, --repository string Comma-separated list of snapshot repository names used to limit the request (default "*")
155+
-s, --snapshot string Comma-separated list of snapshot names to retrieve. Wildcard (*) expressions are supported (default "*")
156+
-h, --help help for snapshot
157+
```
158+
159+
Examples:
160+
161+
```
162+
$ check_elasticsearch snapshot
163+
[OK] - All evaluated snapshots are in state SUCCESS
164+
165+
$ check_elasticsearch snapshot --all -r myrepo
166+
[CRITICAL] - At least one evaluated snapshot is in state FAILED
167+
168+
$ check_elasticsearch snapshot --number 5 -s mysnapshot
169+
[WARNING] - At least one evaluated snapshot is in state PARTIAL
170+
```
171+
172+
133173
## License
134174

135175
Copyright (c) 2022 [NETWAYS GmbH](mailto:[email protected])

cmd/snapshot.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package cmd
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"github.com/NETWAYS/go-check"
8+
"github.com/NETWAYS/go-check/result"
9+
"github.com/spf13/cobra"
10+
)
11+
12+
var snapshotCmd = &cobra.Command{
13+
Use: "snapshot",
14+
Short: "Checks the status of Elasticsearch snapshots",
15+
Long: `Checks the status of Elasticsearch snapshots.
16+
The plugin maps snapshot status to the following exit codes:
17+
18+
SUCCESS, Exit code 0
19+
PARTIAL, Exit code 1
20+
FAILED, Exit code 2
21+
IN_PROGRESS, Exit code 3
22+
23+
If there are multiple snapshots the plugin uses the worst status.
24+
`,
25+
Example: `
26+
$ check_elasticsearch snapshot
27+
[OK] - All evaluated snapshots are in state SUCCESS
28+
29+
$ check_elasticsearch snapshot --all
30+
[CRITICAL] - At least one evaluated snapshot is in state FAILED
31+
32+
$ check_elasticsearch snapshot --number 5
33+
[WARNING] - At least one evaluated snapshot is in state PARTIAL
34+
`,
35+
Run: func(cmd *cobra.Command, args []string) {
36+
repository, _ := cmd.Flags().GetString("repository")
37+
snapshot, _ := cmd.Flags().GetString("snapshot")
38+
numberOfSnapshots, _ := cmd.Flags().GetInt("number")
39+
evalAllSnapshots, _ := cmd.Flags().GetBool("all")
40+
41+
var (
42+
rc int
43+
output string
44+
)
45+
46+
client := cliConfig.NewClient()
47+
48+
snapResponse, err := client.Snapshot(repository, snapshot)
49+
50+
if err != nil {
51+
check.ExitError(err)
52+
}
53+
54+
// If all snapshots are to be evaluated
55+
if evalAllSnapshots {
56+
numberOfSnapshots = len(snapResponse.Snapshots)
57+
}
58+
59+
// If more snapshots are requested than available
60+
if numberOfSnapshots > len(snapResponse.Snapshots) {
61+
numberOfSnapshots = len(snapResponse.Snapshots)
62+
}
63+
64+
// Evaluate snashots given their states
65+
sStates := make([]int, 0, len(snapResponse.Snapshots))
66+
67+
// Check status for each snapshot
68+
var summary strings.Builder
69+
70+
for _, snap := range snapResponse.Snapshots[:numberOfSnapshots] {
71+
72+
summary.WriteString("\n \\_")
73+
74+
switch snap.State {
75+
default:
76+
sStates = append(sStates, check.Unknown)
77+
summary.WriteString(fmt.Sprintf("[UNKNOWN] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
78+
case "SUCCESS":
79+
sStates = append(sStates, check.OK)
80+
summary.WriteString(fmt.Sprintf("[OK] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
81+
case "PARTIAL":
82+
sStates = append(sStates, check.Warning)
83+
summary.WriteString(fmt.Sprintf("[WARNING] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
84+
case "FAILED":
85+
sStates = append(sStates, check.Critical)
86+
summary.WriteString(fmt.Sprintf("[CRITICAL] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
87+
case "IN PROGRESS":
88+
sStates = append(sStates, check.Unknown)
89+
summary.WriteString(fmt.Sprintf("[UNKNOWN] Snapshot: %s, State %s, Repository: %s", snap.Snapshot, snap.State, snap.Repository))
90+
}
91+
}
92+
93+
rc = result.WorstState(sStates...)
94+
95+
switch rc {
96+
case check.OK:
97+
output = "All evaluated snapshots are in state SUCCESS."
98+
case check.Warning:
99+
output = "At least one evaluated snapshot is in state PARTIAL."
100+
case check.Critical:
101+
output = "At least one evaluated snapshot is in state FAILED."
102+
case check.Unknown:
103+
output = "At least one evaluated snapshot is in state IN_PROGRESS."
104+
default:
105+
output = "Could not evaluate status of snapshots"
106+
}
107+
108+
check.ExitRaw(rc, output, "repository:", repository, "snapshot:", snapshot, summary.String())
109+
},
110+
}
111+
112+
func init() {
113+
rootCmd.AddCommand(snapshotCmd)
114+
115+
fs := snapshotCmd.Flags()
116+
117+
fs.StringP("snapshot", "s", "*",
118+
"Comma-separated list of snapshot names to retrieve. Wildcard (*) expressions are supported")
119+
fs.StringP("repository", "r", "*",
120+
"Comma-separated list of snapshot repository names used to limit the request")
121+
122+
fs.IntP("number", "N", 1, "Check latest N number snapshots. If not set only the latest snapshot is checked")
123+
fs.BoolP("all", "a", false, "Check all retrieved snapshots. If not set only the latest snapshot is checked")
124+
125+
snapshotCmd.MarkFlagsMutuallyExclusive("number", "all")
126+
}

cmd/snapshot_test.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package cmd
2+
3+
import (
4+
"net/http"
5+
"net/http/httptest"
6+
"net/url"
7+
"os/exec"
8+
"strings"
9+
"testing"
10+
)
11+
12+
func TestSnapshot_ConnectionRefused(t *testing.T) {
13+
14+
cmd := exec.Command("go", "run", "../main.go", "snapshot", "--port", "9999")
15+
out, _ := cmd.CombinedOutput()
16+
17+
actual := string(out)
18+
expected := "[UNKNOWN] - could not fetch snapshots: Get \"http://localhost:9999/_snapshot/*/*?order=desc\": dial"
19+
20+
if !strings.Contains(actual, expected) {
21+
t.Error("\nActual: ", actual, "\nExpected: ", expected)
22+
}
23+
}
24+
25+
func TestSnapshot_WithWrongFlags(t *testing.T) {
26+
27+
cmd := exec.Command("go", "run", "../main.go", "snapshot", "--all", "--number", "9999")
28+
out, _ := cmd.CombinedOutput()
29+
30+
actual := string(out)
31+
expected := "[UNKNOWN] - if any flags in the group"
32+
33+
if !strings.Contains(actual, expected) {
34+
t.Error("\nActual: ", actual, "\nExpected: ", expected)
35+
}
36+
}
37+
38+
type SnapshotTest struct {
39+
name string
40+
server *httptest.Server
41+
args []string
42+
expected string
43+
}
44+
45+
func TestSnapshotCmd(t *testing.T) {
46+
tests := []SnapshotTest{
47+
{
48+
name: "no-snapshot",
49+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
50+
w.Header().Set("X-Elastic-Product", "Elasticsearch")
51+
w.WriteHeader(http.StatusOK)
52+
w.Write([]byte(`Hey dude where my snapshot`))
53+
})),
54+
args: []string{"run", "../main.go", "snapshot"},
55+
expected: "[UNKNOWN] - could not decode snapshot response",
56+
},
57+
{
58+
name: "snapshot-ok",
59+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
60+
w.Header().Set("X-Elastic-Product", "Elasticsearch")
61+
w.WriteHeader(http.StatusOK)
62+
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1.1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
63+
`))
64+
})),
65+
args: []string{"run", "../main.go", "snapshot"},
66+
expected: "[OK] - All evaluated snapshots are in state SUCCESS",
67+
},
68+
{
69+
name: "snapshot-inprogress",
70+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
71+
w.Header().Set("X-Elastic-Product", "Elasticsearch")
72+
w.WriteHeader(http.StatusOK)
73+
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"IN_PROGRESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
74+
`))
75+
})),
76+
args: []string{"run", "../main.go", "snapshot"},
77+
expected: "[UNKNOWN] - At least one evaluated snapshot is in state IN_PROGRESS",
78+
},
79+
{
80+
name: "snapshot-failed-with-all",
81+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
82+
w.Header().Set("X-Elastic-Product", "Elasticsearch")
83+
w.WriteHeader(http.StatusOK)
84+
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"IN_PROGRESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"FAILED","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
85+
`))
86+
})),
87+
args: []string{"run", "../main.go", "snapshot", "--all"},
88+
expected: "[CRITICAL] - At least one evaluated snapshot is in state FAILED",
89+
},
90+
{
91+
name: "snapshot-partial-with-number",
92+
server: httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
93+
w.Header().Set("X-Elastic-Product", "Elasticsearch")
94+
w.WriteHeader(http.StatusOK)
95+
w.Write([]byte(`{"snapshots":[{"snapshot":"snapshot_1","uuid":"dKb54xw67gvdRctLCxSket","repository":"my_repository","version_id":1,"version":1,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"SUCCESS","start_time":"2020-07-06T21:55:18.129Z","start_time_in_millis":1593093628850,"end_time":"2020-07-06T21:55:18.129Z","end_time_in_millis":1593094752018,"duration_in_millis":0,"failures":[],"shards":{"total":0,"failed":0,"successful":0}},{"snapshot":"snapshot_2","uuid":"vdRctLCxSketdKb54xw67g","repository":"my_repository","version_id":2,"version":2,"indices":[],"data_streams":[],"feature_states":[],"include_global_state":true,"state":"PARTIAL","start_time":"2020-07-06T21:55:18.130Z","start_time_in_millis":1593093628851,"end_time":"2020-07-06T21:55:18.130Z","end_time_in_millis":1593094752019,"duration_in_millis":1,"failures":[],"shards":{"total":0,"failed":0,"successful":0}}],"next":"c25hcHNob3RfMixteV9yZXBvc2l0b3J5LHNuYXBzaG90XzI=","total":3,"remaining":1}
96+
`))
97+
})),
98+
args: []string{"run", "../main.go", "snapshot", "--number", "4"},
99+
expected: "[WARNING] - At least one evaluated snapshot is in state PARTIAL",
100+
},
101+
}
102+
103+
for _, test := range tests {
104+
t.Run(test.name, func(t *testing.T) {
105+
defer test.server.Close()
106+
107+
// We need the random Port extracted
108+
u, _ := url.Parse(test.server.URL)
109+
cmd := exec.Command("go", append(test.args, "--port", u.Port())...)
110+
out, _ := cmd.CombinedOutput()
111+
112+
actual := string(out)
113+
114+
if !strings.Contains(actual, test.expected) {
115+
t.Error("\nActual: ", actual, "\nExpected: ", test.expected)
116+
}
117+
118+
})
119+
}
120+
}

internal/client/client.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,39 @@ func (c *Client) NodeStats() (r *es.ClusterStats, err error) {
179179

180180
return
181181
}
182+
183+
func (c *Client) Snapshot(repository string, snapshot string) (*es.SnapshotResponse, error) {
184+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
185+
defer cancel()
186+
187+
r := &es.SnapshotResponse{}
188+
189+
u, _ := url.JoinPath(c.URL, "/_snapshot/", repository, snapshot)
190+
191+
// Retrieve snapshots in descending order to get latest
192+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u+"?order=desc", nil)
193+
194+
if err != nil {
195+
return r, fmt.Errorf("error creating request: %w", err)
196+
}
197+
198+
resp, err := c.Client.Do(req)
199+
200+
if err != nil {
201+
return r, fmt.Errorf("could not fetch snapshots: %w", err)
202+
}
203+
204+
if resp.StatusCode != http.StatusOK {
205+
return r, fmt.Errorf("request failed for snapshots: %s", resp.Status)
206+
}
207+
208+
defer resp.Body.Close()
209+
210+
err = json.NewDecoder(resp.Body).Decode(r)
211+
212+
if err != nil {
213+
return r, fmt.Errorf("could not decode snapshot response: %w", err)
214+
}
215+
216+
return r, nil
217+
}

internal/elasticsearch/api.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,29 @@ type ClusterStats struct {
7979
Nodes map[string]NodeInfo `json:"nodes"`
8080
ClusterName string `json:"cluster_name"`
8181
}
82+
83+
type Snapshot struct {
84+
Snapshot string `json:"snapshot"`
85+
UUID string `json:"uuid"`
86+
Repository string `json:"repository"`
87+
Indices []string `json:"indices"`
88+
DataStreams []string `json:"data_streams"`
89+
FeatureStates []string `json:"feature_states"`
90+
IncludeGlobalState bool `json:"include_global_state"`
91+
State string `json:"state"`
92+
StartTimeInMillis int `json:"start_time_in_millis"`
93+
EndTimeInMillis int `json:"end_time_in_millis"`
94+
DurationInMillis int `json:"duration_in_millis"`
95+
Failures []string `json:"failures"`
96+
Shards struct {
97+
Total int `json:"total"`
98+
Failed int `json:"failed"`
99+
Successful int `json:"successful"`
100+
} `json:"shards"`
101+
}
102+
103+
type SnapshotResponse struct {
104+
Snapshots []Snapshot `json:"snapshots"`
105+
Total int `json:"total"`
106+
Remaining int `json:"remaining"`
107+
}

0 commit comments

Comments
 (0)