Skip to content

Commit 1f42814

Browse files
committed
initial commit
0 parents  commit 1f42814

File tree

8 files changed

+641
-0
lines changed

8 files changed

+641
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.vscode
2+
.env

README.md

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# PodMonitoringTool
2+
3+
The PodMonitoringTool is simple and lightweight alerting tool for pods on Kubernetes.
4+
Basically, there are some great monitoring tool like [Prometheus](https://github.com/prometheus/prometheus).
5+
However, sometimes we don't need those complicated one.
6+
7+
We usually check the pod status briefly by using following command like below.
8+
In this case, we can find those pods are NOT ready.
9+
10+
```
11+
❯ kubectl get po
12+
NAME READY STATUS RESTARTS AGE
13+
nginx-68cbbbf874-k657d 0/1 Running 0 15m
14+
nginx2-7c9989c6-5g46w 0/1 Running 0 15m
15+
```
16+
17+
This tool allow us to monitor status of pods at specific namespace.
18+
Once this tool detect pod failure, this tool send alert to MS Teams.
19+
20+
# Usage
21+
22+
## Configuration
23+
24+
**Set environment variables like below**
25+
26+
```
27+
NAMESPACE="default"
28+
TEAMS_ENDPOINT="https://outlook.office.com/webhook/XXXXXXXXXXXXX"
29+
OBSERVE_PERIOD=10
30+
```
31+
32+
## Launch
33+
34+
**Run this tool simply**
35+
36+
```
37+
❯ go run main.go
38+
2020/08/16 21:26:17 OBSERVE_PERIOD is 10
39+
2020/08/16 21:26:17 All Pod work succesfully
40+
2020/08/16 21:26:27 All Pod work succesfully
41+
```
42+
43+
## Verification
44+
45+
**The following manifests can be used to verify that this tool works successfully.**
46+
47+
```
48+
❯ kubectl apply -f .
49+
deployment.apps/nginx created
50+
deployment.apps/nginx2 created
51+
```
52+
53+
**And then, you can find 2 pods is in failed state.**
54+
55+
```
56+
❯ go run main.go
57+
2020/08/16 21:27:04 OBSERVE_PERIOD is 10
58+
2020/08/16 21:27:04 2 pods is not running
59+
2020/08/16 21:27:04 1
60+
2020/08/16 21:27:04 default nginx-68cbbbf874-k657d
61+
2020/08/16 21:27:04 2
62+
2020/08/16 21:27:04 default nginx2-7c9989c6-5g46w
63+
```
64+
65+
The alert can be found in your MS Teams like below.
66+
67+
![](teams_alert.PNG)

go.mod

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
module github.com/iaoiui/PodMonitorTool
2+
3+
go 1.14
4+
5+
require (
6+
github.com/imdario/mergo v0.3.11 // indirect
7+
github.com/stretchr/testify v1.6.1 // indirect
8+
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d // indirect
9+
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e // indirect
10+
gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c // indirect
11+
k8s.io/api v0.19.0-rc.3
12+
k8s.io/apimachinery v0.19.0-rc.3
13+
k8s.io/client-go v0.19.0-rc.3
14+
k8s.io/utils v0.0.0-20200815024018-e34d1aa459f9 // indirect
15+
)

go.sum

Lines changed: 336 additions & 0 deletions
Large diffs are not rendered by default.

main.go

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"flag"
6+
"fmt"
7+
log "log"
8+
"net/http"
9+
"os"
10+
"path/filepath"
11+
"strconv"
12+
"strings"
13+
"time"
14+
15+
v1 "k8s.io/api/core/v1"
16+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17+
"k8s.io/client-go/kubernetes"
18+
"k8s.io/client-go/rest"
19+
"k8s.io/client-go/tools/clientcmd"
20+
)
21+
22+
// namespace which is observed
23+
var namespace string = getEnv("NAMESPACE", "default")
24+
25+
// observe period (sec)
26+
var observePeriod = 10
27+
28+
// Teams endpoint
29+
// !! Replace TEAMS_ENDPOINT like "https://outlook.office.com/webhook/XXXX" with your endpoint !!
30+
// TODO Erace Specific endpoint
31+
var teamsEndpoint string = getEnv("TEAMS_ENDPOINT", "")
32+
33+
func getEnv(key, fallback string) string {
34+
if value, ok := os.LookupEnv(key); ok {
35+
return value
36+
}
37+
return fallback
38+
}
39+
40+
func main() {
41+
if teamsEndpoint == "" {
42+
log.Printf("TEAMS_ENDPOINT in not set\n")
43+
log.Fatal("please set TEAMS_ENDPOINT\n")
44+
return
45+
}
46+
observePeriod, err := strconv.Atoi(getEnv("OBSERVE_PERIOD", string(10)))
47+
if err != nil {
48+
log.Fatal(err)
49+
return
50+
}
51+
log.Println("OBSERVE_PERIOD is ", observePeriod)
52+
// get kubeConfig from Home Directory
53+
config := getKubeConfig()
54+
55+
// create the clientset
56+
clientset, err := kubernetes.NewForConfig(config)
57+
if err != nil {
58+
panic(err.Error())
59+
}
60+
for {
61+
listPod(clientset)
62+
63+
time.Sleep(time.Duration(observePeriod) * time.Second)
64+
}
65+
}
66+
67+
func homeDir() string {
68+
if h := os.Getenv("HOME"); h != "" {
69+
return h
70+
}
71+
return os.Getenv("USERPROFILE") // windows
72+
}
73+
74+
func listPod(clientset *kubernetes.Clientset) {
75+
pods, err := clientset.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{})
76+
if err != nil {
77+
panic(err.Error())
78+
}
79+
notReadyPods := getNotReadyPods(pods)
80+
81+
// There is no NotReady Pods
82+
if len(notReadyPods) == 0 {
83+
log.Println("All Pod work succesfully")
84+
} else {
85+
msg := generateAlertMsg(notReadyPods)
86+
sendAlertToTeams(msg)
87+
}
88+
89+
}
90+
91+
func generateAlertMsg(pods []v1.Pod) string {
92+
msg := ""
93+
log.Printf("%v pods is not running \n", len(pods))
94+
msg += fmt.Sprintf("# **%v pods is not running** \n", len(pods)) + "\n"
95+
for i, p := range pods {
96+
log.Println(i + 1)
97+
log.Println("\t", p.Namespace, "\t", p.Name, "\t")
98+
msg += fmt.Sprintln("\t Namespace: \t", p.Namespace, ", Pod: \t", p.Name) + "\n"
99+
}
100+
return msg
101+
}
102+
103+
// getNotReadyPods returns notReady Pods
104+
func getNotReadyPods(pods *v1.PodList) []v1.Pod {
105+
notReadyPods := []v1.Pod{}
106+
for _, p := range pods.Items {
107+
if p.Status.Phase != "Running" {
108+
// Pod is Not Ready
109+
notReadyPods = append(notReadyPods, p)
110+
} else {
111+
// Container is Not Ready
112+
if hasNotReadyContainer(p) {
113+
notReadyPods = append(notReadyPods, p)
114+
}
115+
}
116+
}
117+
return notReadyPods
118+
}
119+
120+
// Identify that all container inside given pod are Running
121+
func hasNotReadyContainer(p v1.Pod) bool {
122+
for _, containerStatus := range p.Status.ContainerStatuses {
123+
if containerStatus.Ready == false {
124+
return true
125+
}
126+
}
127+
return false
128+
}
129+
130+
func getKubeConfig() *rest.Config {
131+
var kubeconfig *string
132+
if home := homeDir(); home != "" {
133+
kubeconfig = flag.String("kubeconfig", filepath.Join(home, ".kube", "config"), "(optional) absolute path to the kubeconfig file")
134+
} else {
135+
kubeconfig = flag.String("kubeconfig", "", "absolute path to the kubeconfig file")
136+
}
137+
flag.Parse()
138+
139+
// use the current context in kubeconfig
140+
config, err := clientcmd.BuildConfigFromFlags("", *kubeconfig)
141+
if err != nil {
142+
panic(err.Error())
143+
}
144+
return config
145+
}
146+
147+
func sendAlertToTeams(msg string) {
148+
b := fmt.Sprintf(`{ "title": "Pod Defect Alert", "text": "%v"}`, msg)
149+
body := strings.NewReader(b)
150+
req, err := http.NewRequest("POST", teamsEndpoint, body)
151+
if err != nil {
152+
log.Println(err)
153+
return
154+
}
155+
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
156+
157+
resp, err := http.DefaultClient.Do(req)
158+
if err != nil {
159+
log.Println(err)
160+
return
161+
}
162+
defer resp.Body.Close()
163+
}

teams_alert.png

35 KB
Loading

test_pod.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
creationTimestamp: null
5+
labels:
6+
app: nginx
7+
name: nginx
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: nginx
13+
strategy: {}
14+
template:
15+
metadata:
16+
creationTimestamp: null
17+
labels:
18+
app: nginx
19+
spec:
20+
containers:
21+
- image: nginx
22+
name: nginx
23+
readinessProbe:
24+
httpGet:
25+
port: 8080
26+
path: /
27+
failureThreshold: 2
28+
periodSeconds: 5
29+
status: {}

test_pod2.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
creationTimestamp: null
5+
labels:
6+
app: nginx2
7+
name: nginx2
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: nginx2
13+
strategy: {}
14+
template:
15+
metadata:
16+
creationTimestamp: null
17+
labels:
18+
app: nginx2
19+
spec:
20+
containers:
21+
- image: nginx
22+
name: nginx
23+
readinessProbe:
24+
httpGet:
25+
port: 8080
26+
path: /
27+
failureThreshold: 2
28+
periodSeconds: 5
29+
status: {}

0 commit comments

Comments
 (0)