Skip to content

Commit 3f1c159

Browse files
authored
ci: validate pods and systemd-networkd restart for PRs (#1909)
* update script to check cns in memory and add to pr pipeline * adding stage to both overlay and podsubnet cilium stages * add exit case if priveleged pod is not found * check status of priv pod * call ds status before exit * install cilium ds with kubectl and not helm for systemd-networkd initcontainer patch * upload cilium ds * adding files for cilium-agent and cilium-operator deployment * update cilium ds * addressing comments
1 parent 08d3fb1 commit 3f1c159

File tree

12 files changed

+983
-12
lines changed

12 files changed

+983
-12
lines changed

.pipelines/singletenancy/cilium/cilium-e2e-step-template.yaml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,13 @@ steps:
4545
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
4646
kubectl cluster-info
4747
kubectl get po -owide -A
48-
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
49-
chmod 700 get_helm.sh
50-
./get_helm.sh
5148
echo "deploy Cilium ConfigMap"
5249
kubectl apply -f cilium/configmap.yaml
50+
kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml
5351
echo "install Cilium"
54-
helm repo add cilium https://helm.cilium.io/
55-
helm install cilium cilium/cilium --version 1.12.5 --namespace kube-system -f cilium/cilium_helm_values.yaml
52+
kubectl apply -f test/integration/manifests/cilium/cilium-agent
53+
kubectl apply -f test/integration/manifests/cilium/cilium-operator
54+
kubectl get po -owide -A
5655
name: "installCilium"
5756
displayName: "Install Cilium"
5857
@@ -108,6 +107,14 @@ steps:
108107
pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output"
109108
condition: always()
110109

110+
- script: |
111+
echo "validate pod IP assignment and check systemd-networkd restart"
112+
kubectl apply -f hack/manifests/hostprocess.yaml
113+
kubectl get pod -owide -A
114+
bash hack/scripts/validate_state.sh
115+
name: "validatePods"
116+
displayName: "Validate Pods"
117+
111118
- script: |
112119
echo "Run Service Conformance E2E"
113120
export PATH=${PATH}:/usr/local/bin/gsutil

.pipelines/singletenancy/overlay/overlay-e2e-step-template.yaml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,13 @@ steps:
4545
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
4646
kubectl cluster-info
4747
kubectl get po -owide -A
48-
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
49-
chmod 700 get_helm.sh
50-
./get_helm.sh
5148
echo "deploy Cilium ConfigMap"
5249
kubectl apply -f cilium/configmap.yaml
50+
kubectl apply -f test/integration/manifests/cilium/cilium-config.yaml
5351
echo "install Cilium onto Overlay Cluster"
54-
helm repo add cilium https://helm.cilium.io/
55-
helm install cilium cilium/cilium --version 1.12.5 --namespace kube-system -f cilium/cilium_helm_values.yaml
52+
kubectl apply -f test/integration/manifests/cilium/cilium-agent
53+
kubectl apply -f test/integration/manifests/cilium/cilium-operator
54+
kubectl get po -owide -A
5655
name: "installCilium"
5756
displayName: "Install Cilium on AKS Overlay"
5857
@@ -114,6 +113,14 @@ steps:
114113
pathtoPublish: "$(Build.ArtifactStagingDirectory)/test-output"
115114
condition: always()
116115

116+
- script: |
117+
echo "validate pod IP assignment and check systemd-networkd restart"
118+
kubectl apply -f hack/manifests/hostprocess.yaml
119+
kubectl get pod -owide -A
120+
bash hack/scripts/validate_state.sh
121+
name: "validatePods"
122+
displayName: "Validate Pods"
123+
117124
- script: |
118125
echo "Run Service Conformance E2E"
119126
export PATH=${PATH}:/usr/local/bin/gsutil

hack/scripts/validate_state.sh

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ do
1717
echo "Node internal ip: $node_ip"
1818
privileged_pod=$(kubectl get pods -n kube-system -l app=privileged-daemonset -o wide | grep "$node_name" | awk '{print $1}')
1919
echo "privileged pod : $privileged_pod"
20+
if [ "$privileged_pod" == '' ]; then
21+
kubectl describe daemonset privileged-daemonset -n kube-system
22+
exit 1
23+
fi
2024
while ! [ -s "azure_endpoints.json" ]
2125
do
2226
echo "trying to get the azure_endpoints"
@@ -34,6 +38,16 @@ do
3438
sleep 10
3539
done
3640

41+
cns_pod=$(kubectl get pod -l k8s-app=azure-cns -n kube-system -o wide | grep "$node_name" | awk '{print $1}')
42+
echo "azure-cns pod : $cns_pod"
43+
44+
while ! [ -s "cns_endpoints.json" ]
45+
do
46+
echo "trying to get the cns_endpoints"
47+
kubectl exec -it "$cns_pod" -n kube-system -- curl localhost:10090/debug/ipaddresses -d '{"IPConfigStateFilter":["Assigned"]}' > cns_endpoints.json
48+
sleep 10
49+
done
50+
3751
total_pods=$(kubectl get pods --all-namespaces -o wide --field-selector spec.nodeName="$node_name",status.phase=Running --output json)
3852

3953
echo "Checking if there are any pods with no ips"
@@ -60,7 +74,7 @@ do
6074
echo "Number of azure endpoint ips : $num_of_azure_endpoint_ips"
6175

6276
if [ "$num_of_pod_ips" != "$num_of_azure_endpoint_ips" ]; then
63-
printf "Error: Number of pods in running state is less than total ips in the azure ednpoint file" >&2
77+
printf "Error: Number of pods in running state is less than total ips in the azure endpoint file" >&2
6478
exit 1
6579
fi
6680

@@ -92,7 +106,25 @@ do
92106
fi
93107
done
94108

109+
num_of_cns_endpoints=$(cat cns_endpoints.json | jq -r '[.IPConfigurationStatus | .[] | select(.IPAddress != null)] | length')
110+
cns_endpoint_ips=$(cat cns_endpoints.json | jq -r '(.IPConfigurationStatus | .[] | select(.IPAddress != null) | .IPAddress)')
111+
echo "Number of cns endpoints: $num_of_cns_endpoints"
112+
113+
if [ "$num_of_pod_ips" != "$num_of_cns_endpoints" ]; then
114+
printf "Error: Number of pods in running state is less than total ips in the cns endpoint file" >&2
115+
exit 1
116+
fi
117+
118+
for ip in "${pod_ips[@]}"
119+
do
120+
find_in_array "$cns_endpoint_ips" "$ip" "cns_endpoints.json"
121+
if [[ $? -eq 1 ]]; then
122+
printf "Error: %s Not found in the cns_endpoints.json" "$ip" >&2
123+
exit 1
124+
fi
125+
done
126+
95127
#We are restarting the systmemd network and checking that the connectivity works after the restart. For more details: https://github.com/cilium/cilium/issues/18706
96128
kubectl exec -i "$privileged_pod" -n kube-system -- bash -c "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'"
97-
rm -rf cilium_endpoints.json azure_endpoints.json
129+
rm -rf cilium_endpoints.json azure_endpoints.json cns_endpoints.json
98130
done
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRole
3+
metadata:
4+
name: cilium
5+
rules:
6+
- apiGroups:
7+
- networking.k8s.io
8+
resources:
9+
- networkpolicies
10+
verbs:
11+
- get
12+
- list
13+
- watch
14+
- apiGroups:
15+
- discovery.k8s.io
16+
resources:
17+
- endpointslices
18+
verbs:
19+
- get
20+
- list
21+
- watch
22+
- apiGroups:
23+
- ""
24+
resources:
25+
- namespaces
26+
- services
27+
- pods
28+
- endpoints
29+
- nodes
30+
verbs:
31+
- get
32+
- list
33+
- watch
34+
- apiGroups:
35+
- apiextensions.k8s.io
36+
resources:
37+
- customresourcedefinitions
38+
verbs:
39+
- list
40+
- watch
41+
# This is used when validating policies in preflight. This will need to stay
42+
# until we figure out how to avoid "get" inside the preflight, and then
43+
# should be removed ideally.
44+
- get
45+
- apiGroups:
46+
- cilium.io
47+
resources:
48+
- ciliumbgploadbalancerippools
49+
- ciliumbgppeeringpolicies
50+
- ciliumclusterwideenvoyconfigs
51+
- ciliumclusterwidenetworkpolicies
52+
- ciliumegressgatewaypolicies
53+
- ciliumegressnatpolicies
54+
- ciliumendpoints
55+
- ciliumendpointslices
56+
- ciliumenvoyconfigs
57+
- ciliumidentities
58+
- ciliumlocalredirectpolicies
59+
- ciliumnetworkpolicies
60+
- ciliumnodes
61+
verbs:
62+
- list
63+
- watch
64+
- apiGroups:
65+
- cilium.io
66+
resources:
67+
- ciliumidentities
68+
- ciliumendpoints
69+
- ciliumnodes
70+
verbs:
71+
- create
72+
- apiGroups:
73+
- cilium.io
74+
resources:
75+
- ciliumendpoints
76+
verbs:
77+
- delete
78+
- get
79+
- apiGroups:
80+
- cilium.io
81+
resources:
82+
- ciliumnodes
83+
- ciliumnodes/status
84+
verbs:
85+
- get
86+
- update
87+
- apiGroups:
88+
- cilium.io
89+
resources:
90+
- ciliumnetworkpolicies/status
91+
- ciliumclusterwidenetworkpolicies/status
92+
- ciliumendpoints/status
93+
- ciliumendpoints
94+
verbs:
95+
- patch
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: rbac.authorization.k8s.io/v1
2+
kind: ClusterRoleBinding
3+
metadata:
4+
name: cilium
5+
roleRef:
6+
apiGroup: rbac.authorization.k8s.io
7+
kind: ClusterRole
8+
name: cilium
9+
subjects:
10+
- kind: ServiceAccount
11+
name: "cilium"
12+
namespace: kube-system

0 commit comments

Comments
 (0)