Skip to content

Commit c9f70f5

Browse files
authored
unexpected node reboot (#383)
* test: add test for unexpected node reboot * test: add sts test * test: spawn debug containers in tmp namespace * chore: linting * test: force sts pod on another node * test: split pod / sts * test: make unexpected-reboot not concurrent * test: fix sts-pvc-unexpected-reboot name * test: assert NodeNotReady event * test: use wait instead of assert for pod ready
1 parent 0384567 commit c9f70f5

File tree

9 files changed

+497
-0
lines changed

9 files changed

+497
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: apps/v1
2+
kind: DaemonSet
3+
metadata:
4+
name: csi-linode-node
5+
namespace: kube-system
6+
status:
7+
numberAvailable: ($nodes)
8+
numberReady: ($nodes)
9+
---
10+
apiVersion: apps/v1
11+
kind: StatefulSet
12+
metadata:
13+
name: csi-linode-controller
14+
namespace: kube-system
15+
status:
16+
availableReplicas: 1
17+
readyReplicas: 1
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: e2e-pod
5+
status:
6+
containerStatuses:
7+
- name: e2e-pod
8+
ready: true
9+
started: true
10+
phase: Running
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: e2e-pod
5+
status:
6+
containerStatuses:
7+
- name: e2e-pod
8+
ready: true
9+
started: true
10+
phase: Running
11+
---
12+
apiVersion: v1
13+
kind: PersistentVolumeClaim
14+
metadata:
15+
name: pvc-filesystem
16+
status:
17+
capacity:
18+
storage: 10Gi
19+
phase: Bound
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json
2+
apiVersion: chainsaw.kyverno.io/v1alpha1
3+
kind: Test
4+
metadata:
5+
creationTimestamp: null
6+
name: pod-pvc-unexpected-reboot
7+
labels:
8+
all:
9+
basic:
10+
spec:
11+
concurrent: false
12+
bindings:
13+
- name: nodes
14+
# number of nodes in cluster
15+
value: (length(x_k8s_list($client, 'v1', 'Node', '').items))
16+
- name: linode_url
17+
value: (base64_decode(x_k8s_get($client, 'v1', 'Secret', 'kube-system', 'linode').data.apiurl || base64_encode('https://api.linode.com')))
18+
- name: linode_token
19+
value: (base64_decode(x_k8s_get($client, 'v1', 'Secret', 'kube-system', 'linode').data.token))
20+
steps:
21+
- name: Check if CSI Driver is deployed
22+
try:
23+
- assert:
24+
file: assert-csi-driver-resources.yaml
25+
- name: Create PVC and Pod
26+
try:
27+
- apply:
28+
file: create-pvc-pod.yaml
29+
catch:
30+
- describe:
31+
apiVersion: v1
32+
kind: Pod
33+
- describe:
34+
apiVersion: v1
35+
kind: PersistentVolumeClaim
36+
- name: Check if Pod is ready and Volume is mounted
37+
try:
38+
- assert:
39+
file: assert-pvc-pod.yaml
40+
catch:
41+
- describe:
42+
apiVersion: v1
43+
kind: PersistentVolumeClaim
44+
- describe:
45+
apiVersion: v1
46+
kind: Pod
47+
- name: Check if volume is create
48+
try:
49+
- script:
50+
env:
51+
- name: TARGET_API
52+
value: ($linode_url)
53+
- name: TARGET_API_VERSION
54+
value: v4
55+
- name: URI
56+
value: volumes
57+
- name: LINODE_TOKEN
58+
value: ($linode_token)
59+
- name: FILTER
60+
value: (to_string({"tags":($namespace)}))
61+
content: |
62+
set -e
63+
curl -s \
64+
-H "Authorization: Bearer ${LINODE_TOKEN}" \
65+
-H "X-Filter: $FILTER" \
66+
-H "Content-Type: application/json" \
67+
"${TARGET_API}/${TARGET_API_VERSION}/${URI}"
68+
check:
69+
($error): ~
70+
(json_parse($stdout)):
71+
results: 1
72+
- name: Create a file inside the pod and check it was created
73+
try:
74+
- script:
75+
env:
76+
- name: NAMESPACE
77+
value: ($namespace)
78+
content: |
79+
kubectl exec -n $NAMESPACE e2e-pod -- sh -c "cd data && touch testfile" && \
80+
kubectl exec -n $NAMESPACE e2e-pod -- sh -c "ls data"
81+
check:
82+
($error): ~
83+
(contains($stdout, 'testfile')): true
84+
85+
- name: Reboot the Node of the Pod e2e-pod
86+
try:
87+
- script:
88+
env:
89+
- name: NAMESPACE
90+
value: ($namespace)
91+
content: |
92+
kubectl get pod e2e-pod -n $NAMESPACE -o jsonpath='{.spec.nodeName}'
93+
outputs:
94+
- name: nodeName
95+
value: ($stdout)
96+
97+
# get bootid of the node
98+
- script:
99+
env:
100+
- name: NODE_NAME
101+
value: ($nodeName)
102+
content: |
103+
kubectl get node $NODE_NAME -o jsonpath='{.status.nodeInfo.bootID}'
104+
check:
105+
($error): ~
106+
outputs:
107+
- name: bootId
108+
value: ($stdout)
109+
- script:
110+
env:
111+
- name: NODE_NAME
112+
value: ($nodeName)
113+
- name: NAMESPACE
114+
value: ($namespace)
115+
content: |
116+
kubectl debug -n $NAMESPACE node/$NODE_NAME --profile=sysadmin --image=busybox -- chroot /host/ reboot --force
117+
- assert:
118+
resource:
119+
apiVersion: v1
120+
kind: Event
121+
reason: NodeNotReady
122+
source:
123+
component: node-controller
124+
involvedObject:
125+
apiVersion: v1
126+
kind: Pod
127+
name: e2e-pod
128+
namespace: ($namespace)
129+
- wait:
130+
apiVersion: v1
131+
kind: Node
132+
timeout: 120s
133+
name: ($nodeName)
134+
for:
135+
condition:
136+
name: Ready
137+
value: "true"
138+
# validate the bootid of the node changed
139+
- script:
140+
env:
141+
- name: NODE_NAME
142+
value: ($nodeName)
143+
content: |
144+
kubectl get node $NODE_NAME -o jsonpath='{.status.nodeInfo.bootID}'
145+
check:
146+
($error): ~
147+
($stdout != ($bootId)): true
148+
149+
- name: Check if Pod is ready after reboot
150+
try:
151+
- wait:
152+
apiVersion: v1
153+
kind: Pod
154+
name: e2e-pod
155+
namespace: ($namespace)
156+
timeout: 120s
157+
for:
158+
condition:
159+
name: Ready
160+
value: "true"
161+
- script:
162+
env:
163+
- name: NAMESPACE
164+
value: ($namespace)
165+
content: |
166+
kubectl exec -n $NAMESPACE e2e-pod -- sh -c "ls data"
167+
check:
168+
($error): ~
169+
(contains($stdout, 'testfile')): true
170+
catch:
171+
- describe:
172+
apiVersion: v1
173+
kind: Pod
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
allowVolumeExpansion: true
2+
apiVersion: storage.k8s.io/v1
3+
kind: StorageClass
4+
metadata:
5+
name: (join('-', ['linode-block-storage', $namespace]))
6+
provisioner: linodebs.csi.linode.com
7+
reclaimPolicy: Delete
8+
volumeBindingMode: Immediate
9+
parameters:
10+
linodebs.csi.linode.com/volumeTags: (to_string($namespace))
11+
---
12+
apiVersion: v1
13+
kind: PersistentVolumeClaim
14+
metadata:
15+
name: pvc-filesystem
16+
spec:
17+
accessModes:
18+
- ReadWriteOnce
19+
resources:
20+
requests:
21+
storage: 10Gi
22+
storageClassName: (join('-', ['linode-block-storage', $namespace]))
23+
---
24+
apiVersion: v1
25+
kind: Pod
26+
metadata:
27+
name: e2e-pod
28+
spec:
29+
containers:
30+
- name: e2e-pod
31+
image: ubuntu
32+
command:
33+
- sleep
34+
- "1000000"
35+
volumeMounts:
36+
- mountPath: /data
37+
name: csi-volume
38+
tolerations:
39+
- key: "node-role.kubernetes.io/control-plane"
40+
operator: "Exists"
41+
effect: "NoSchedule"
42+
volumes:
43+
- name: csi-volume
44+
persistentVolumeClaim:
45+
claimName: pvc-filesystem
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: apps/v1
2+
kind: DaemonSet
3+
metadata:
4+
name: csi-linode-node
5+
namespace: kube-system
6+
status:
7+
numberAvailable: ($nodes)
8+
numberReady: ($nodes)
9+
---
10+
apiVersion: apps/v1
11+
kind: StatefulSet
12+
metadata:
13+
name: csi-linode-controller
14+
namespace: kube-system
15+
status:
16+
availableReplicas: 1
17+
readyReplicas: 1
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: apps/v1
2+
kind: StatefulSet
3+
metadata:
4+
name: redis-test
5+
status:
6+
availableReplicas: 1
7+
readyReplicas: 1

0 commit comments

Comments
 (0)