Skip to content

Commit d0559f8

Browse files
authored
ci: Improve CreateValidator and restartNetworkCmd reliability (#2181)
* ci: Improve CreateValidator and restartNetworkCmd reliability * lint fix * Addressing comments
1 parent 2a468c4 commit d0559f8

File tree

3 files changed

+46
-8
lines changed

3 files changed

+46
-8
lines changed

test/internal/k8sutils/utils.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,43 @@ func WaitForPodDeployment(ctx context.Context, clientset *kubernetes.Clientset,
267267
return errors.Wrapf(retrier.Do(ctx, checkPodDeploymentFn), "could not wait for deployment %s", deploymentName)
268268
}
269269

270+
func WaitForPodDaemonset(ctx context.Context, clientset *kubernetes.Clientset, namespace, daemonsetName, podLabelSelector string) error {
271+
podsClient := clientset.CoreV1().Pods(namespace)
272+
daemonsetClient := clientset.AppsV1().DaemonSets(namespace)
273+
checkPodDaemonsetFn := func() error {
274+
daemonset, err := daemonsetClient.Get(ctx, daemonsetName, metav1.GetOptions{})
275+
if err != nil {
276+
return errors.Wrapf(err, "could not get daemonset %s", daemonsetName)
277+
}
278+
279+
if daemonset.Status.NumberReady == 0 && daemonset.Status.DesiredNumberScheduled == 0 {
280+
// Capture daemonset restart. Restart sets every numerical status to 0.
281+
log.Printf("daemonset %s is in restart phase, no pods should be ready or scheduled", daemonsetName)
282+
return errors.New("daemonset did not set any pods to be scheduled")
283+
}
284+
285+
if daemonset.Status.NumberReady != daemonset.Status.DesiredNumberScheduled {
286+
// Provide real-time daemonset availability to console
287+
log.Printf("daemonset %s has %d pods in ready status, expected %d", daemonsetName, daemonset.Status.NumberReady, daemonset.Status.DesiredNumberScheduled)
288+
return errors.New("daemonset does not have the expected number of ready state pods")
289+
}
290+
291+
podList, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: podLabelSelector})
292+
if err != nil {
293+
return errors.Wrapf(err, "could not list pods with label selector %s", podLabelSelector)
294+
}
295+
296+
log.Printf("daemonset %s has %d pods in ready status, expected %d", daemonsetName, len(podList.Items), daemonset.Status.CurrentNumberScheduled)
297+
if len(podList.Items) != int(daemonset.Status.NumberReady) {
298+
return errors.New("some pods of the daemonset are still not ready")
299+
}
300+
return nil
301+
}
302+
303+
retrier := retry.Retrier{Attempts: RetryAttempts, Delay: RetryDelay}
304+
return errors.Wrapf(retrier.Do(ctx, checkPodDaemonsetFn), "could not wait for daemonset %s", daemonsetName)
305+
}
306+
270307
func MustUpdateReplica(ctx context.Context, deploymentsClient typedappsv1.DeploymentInterface, deploymentName string, replicas int32) error {
271308
deployment, err := deploymentsClient.Get(ctx, deploymentName, metav1.GetOptions{})
272309
if err != nil {

test/validate/linux_validate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const (
1515
)
1616

1717
var (
18-
restartNetworkCmd = []string{"bash", "-c", "chroot /host /bin/bash -c 'systemctl restart systemd-networkd'"}
18+
restartNetworkCmd = []string{"bash", "-c", "chroot /host /bin/bash -c systemctl restart systemd-networkd"}
1919
cnsStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-cns/azure-endpoints.json"}
2020
azureVnetStateFileCmd = []string{"bash", "-c", "cat /var/run/azure-vnet.json"}
2121
ciliumStateFileCmd = []string{"bash", "-c", "cilium endpoint list -o json"}

test/validate/validate.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,19 @@ type check struct {
4444
cmd []string
4545
}
4646

47-
func CreateValidator(ctx context.Context, clienset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool, os string) (*Validator, error) {
47+
func CreateValidator(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, cni string, restartCase bool, os string) (*Validator, error) {
4848
// deploy privileged pod
4949
privilegedDaemonSet, err := k8sutils.MustParseDaemonSet(privilegedDaemonSetPathMap[os])
5050
if err != nil {
5151
return nil, errors.Wrap(err, "unable to parse daemonset")
5252
}
53-
daemonsetClient := clienset.AppsV1().DaemonSets(privilegedNamespace)
53+
daemonsetClient := clientset.AppsV1().DaemonSets(privilegedNamespace)
5454
if err := k8sutils.MustCreateDaemonset(ctx, daemonsetClient, privilegedDaemonSet); err != nil {
5555
return nil, errors.Wrap(err, "unable to create daemonset")
5656
}
57-
if err := k8sutils.WaitForPodsRunning(ctx, clienset, privilegedNamespace, privilegedLabelSelector); err != nil {
58-
return nil, errors.Wrap(err, "error while waiting for pods to be running")
57+
// Ensures that pods have been replaced if test is re-run after failure
58+
if err := k8sutils.WaitForPodDaemonset(ctx, clientset, privilegedNamespace, privilegedDaemonSet.Name, privilegedLabelSelector); err != nil {
59+
return nil, errors.Wrap(err, "unable to wait for daemonset")
5960
}
6061

6162
var checks []check
@@ -69,7 +70,7 @@ func CreateValidator(ctx context.Context, clienset *kubernetes.Clientset, config
6970
}
7071

7172
return &Validator{
72-
clientset: clienset,
73+
clientset: clientset,
7374
config: config,
7475
namespace: namespace,
7576
cni: cni,
@@ -124,7 +125,7 @@ func (v *Validator) ValidateRestartNetwork(ctx context.Context) error {
124125
// exec into the pod to get the state file
125126
_, err = k8sutils.ExecCmdOnPod(ctx, v.clientset, privilegedNamespace, privelegedPod.Name, restartNetworkCmd, v.config)
126127
if err != nil {
127-
return errors.Wrapf(err, "failed to exec into privileged pod")
128+
return errors.Wrapf(err, "failed to exec into privileged pod - %s", privelegedPod.Name)
128129
}
129130
err = k8sutils.WaitForPodsRunning(ctx, v.clientset, "", "")
130131
if err != nil {
@@ -151,7 +152,7 @@ func (v *Validator) validateIPs(ctx context.Context, stateFileIps stateFileIpsFu
151152
// exec into the pod to get the state file
152153
result, err := k8sutils.ExecCmdOnPod(ctx, v.clientset, namespace, podName, cmd, v.config)
153154
if err != nil {
154-
return errors.Wrapf(err, "failed to exec into privileged pod")
155+
return errors.Wrapf(err, "failed to exec into privileged pod - %s", podName)
155156
}
156157
filePodIps, err := stateFileIps(result)
157158
if err != nil {

0 commit comments

Comments
 (0)