Skip to content

Commit 7369ccc

Browse files
committed
make time sync e2e test more reliable for dynamic infra
1 parent b6b38b0 commit 7369ccc

File tree

3 files changed

+127
-43
lines changed

3 files changed

+127
-43
lines changed

test/e2e/azure_timesync.go

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ import (
2222
"context"
2323
"fmt"
2424
"strings"
25+
"time"
2526

2627
. "github.com/onsi/gomega"
28+
"github.com/pkg/errors"
2729
corev1 "k8s.io/api/core/v1"
2830
"sigs.k8s.io/cluster-api/test/framework"
2931
kinderrors "sigs.k8s.io/kind/pkg/errors"
@@ -42,6 +44,7 @@ func AzureTimeSyncSpec(ctx context.Context, inputGetter func() AzureTimeSyncSpec
4244
var (
4345
specName = "azure-timesync"
4446
input AzureTimeSyncSpecInput
47+
thirtySeconds = 30*time.Second
4548
)
4649

4750
input = inputGetter()
@@ -50,40 +53,47 @@ func AzureTimeSyncSpec(ctx context.Context, inputGetter func() AzureTimeSyncSpec
5053
namespace, name := input.Namespace.Name, input.ClusterName
5154
managementClusterClient := input.BootstrapClusterProxy.GetClient()
5255

53-
sshInfo, err := getClusterSSHInfo(ctx, managementClusterClient, namespace, name)
54-
Expect(err).NotTo(HaveOccurred())
55-
Expect(len(sshInfo)).To(BeNumerically(">", 0))
56+
Eventually(func() error {
57+
sshInfo, err := getClusterSSHInfo(ctx, managementClusterClient, namespace, name)
58+
if err != nil {
59+
return err
60+
}
5661

57-
testfuncs := []func() error{}
58-
for _, s := range sshInfo {
59-
Byf("checking that time synchronization is healthy on %s", s.Hostname)
62+
if len(sshInfo) <= 0 {
63+
return errors.New("sshInfo did not contain any machines")
64+
}
6065

61-
execToStringFn := func(expected, command string, args ...string) func() error {
62-
// don't assert in this test func, just return errors
63-
return func() error {
64-
f := &strings.Builder{}
65-
if err := execOnHost(s.Endpoint, s.Hostname, s.Port, f, command, args...); err != nil {
66-
return err
66+
var testFuncs []func() error
67+
for _, s := range sshInfo {
68+
Byf("checking that time synchronization is healthy on %s", s.Hostname)
69+
70+
execToStringFn := func(expected, command string, args ...string) func() error {
71+
// don't assert in this test func, just return errors
72+
return func() error {
73+
f := &strings.Builder{}
74+
if err := execOnHost(s.Endpoint, s.Hostname, s.Port, f, command, args...); err != nil {
75+
return err
76+
}
77+
if !strings.Contains(f.String(), expected) {
78+
return fmt.Errorf("expected \"%s\" in command output:\n%s", expected, f.String())
79+
}
80+
return nil
6781
}
68-
if !strings.Contains(f.String(), expected) {
69-
return fmt.Errorf("expected \"%s\" in command output:\n%s", expected, f.String())
70-
}
71-
return nil
7282
}
83+
84+
testFuncs = append(testFuncs,
85+
execToStringFn(
86+
"✓ chronyd is active",
87+
"systemctl", "is-active", "chronyd", "&&",
88+
"echo", "✓ chronyd is active",
89+
),
90+
execToStringFn(
91+
"Reference ID",
92+
"chronyc", "tracking",
93+
),
94+
)
7395
}
7496

75-
testfuncs = append(testfuncs,
76-
execToStringFn(
77-
"✓ chronyd is active",
78-
"systemctl", "is-active", "chronyd", "&&",
79-
"echo", "✓ chronyd is active",
80-
),
81-
execToStringFn(
82-
"Reference ID",
83-
"chronyc", "tracking",
84-
),
85-
)
86-
}
87-
88-
Expect(kinderrors.AggregateConcurrent(testfuncs)).To(Succeed())
97+
return kinderrors.AggregateConcurrent(testFuncs)
98+
}, thirtySeconds, thirtySeconds).Should(Succeed())
8999
}

test/e2e/config/azure-dev.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ providers:
5151
value: "${PWD}/config/default"
5252
files:
5353
- sourcePath: "../data/shared/v1alpha4_provider/metadata.yaml"
54+
replacements:
55+
- old: "--v=0"
56+
new: "--v=4"
5457
files:
5558
- sourcePath: "../data/shared/v1alpha4/metadata.yaml"
5659
- sourcePath: "${PWD}/templates/test/ci/cluster-template-prow.yaml"
@@ -83,9 +86,6 @@ providers:
8386
targetName: "cluster-template-machine-pool-windows.yaml"
8487
- sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-external-cloud-provider.yaml"
8588
targetName: "cluster-template-external-cloud-provider.yaml"
86-
replacements:
87-
- old: "--v=0"
88-
new: "--v=4"
8989

9090
variables:
9191
KUBERNETES_VERSION: "${KUBERNETES_VERSION:-v1.19.7}"

test/e2e/helpers.go

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,22 @@ import (
4141
appsv1 "k8s.io/api/apps/v1"
4242
batchv1 "k8s.io/api/batch/v1"
4343
corev1 "k8s.io/api/core/v1"
44+
apierrors "k8s.io/apimachinery/pkg/api/errors"
4445
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
4546
"k8s.io/client-go/kubernetes"
4647
typedappsv1 "k8s.io/client-go/kubernetes/typed/apps/v1"
4748
typedbatchv1 "k8s.io/client-go/kubernetes/typed/batch/v1"
4849
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
50+
"k8s.io/client-go/tools/clientcmd"
4951
"sigs.k8s.io/cluster-api-provider-azure/azure"
52+
"sigs.k8s.io/cluster-api-provider-azure/util/tele"
5053
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha4"
54+
"sigs.k8s.io/cluster-api/controllers/noderefutil"
5155
clusterv1exp "sigs.k8s.io/cluster-api/exp/api/v1alpha4"
5256
"sigs.k8s.io/cluster-api/test/framework"
5357
"sigs.k8s.io/cluster-api/test/framework/kubernetesversions"
5458
"sigs.k8s.io/cluster-api/util"
59+
utilkubeconfig "sigs.k8s.io/cluster-api/util/kubeconfig"
5560
"sigs.k8s.io/controller-runtime/pkg/client"
5661
)
5762

@@ -311,18 +316,17 @@ type nodeSSHInfo struct {
311316
// getClusterSSHInfo returns the information needed to establish a SSH connection through a
312317
// control plane endpoint to each node in the cluster.
313318
func getClusterSSHInfo(ctx context.Context, c client.Client, namespace, name string) ([]nodeSSHInfo, error) {
314-
sshInfo := []nodeSSHInfo{}
315-
319+
var sshInfo []nodeSSHInfo
316320
// Collect the info for each VM / Machine.
317321
machines, err := getMachinesInCluster(ctx, c, namespace, name)
318322
if err != nil {
319-
return nil, err
323+
return sshInfo, errors.Wrap(err, "failed to get machines in the cluster")
320324
}
321325
for i := range machines.Items {
322326
m := &machines.Items[i]
323327
cluster, err := util.GetClusterFromMetadata(ctx, c, m.ObjectMeta)
324328
if err != nil {
325-
return nil, err
329+
return nil, errors.Wrap(err, "failed to get cluster from metadata")
326330
}
327331
sshInfo = append(sshInfo, nodeSSHInfo{
328332
Endpoint: cluster.Spec.ControlPlaneEndpoint.Host,
@@ -334,28 +338,98 @@ func getClusterSSHInfo(ctx context.Context, c client.Client, namespace, name str
334338
// Collect the info for each instance in a VMSS / MachinePool.
335339
machinePools, err := getMachinePoolsInCluster(ctx, c, namespace, name)
336340
if err != nil {
337-
return nil, err
341+
return sshInfo, errors.Wrap(err, "failed to find machine pools in cluster")
342+
}
343+
344+
// make a workload client to access the workload cluster
345+
workloadClient, err := getWorkloadClient(ctx, c, namespace, name)
346+
if err != nil {
347+
return sshInfo, errors.Wrap(err, "failed to get workload client")
338348
}
349+
339350
for i := range machinePools.Items {
340351
p := &machinePools.Items[i]
341352
cluster, err := util.GetClusterFromMetadata(ctx, c, p.ObjectMeta)
342353
if err != nil {
343-
return nil, err
354+
return sshInfo, errors.Wrap(err, "failed to get cluster from metadata")
355+
}
356+
357+
nodes, err := getReadyNodes(ctx, workloadClient, p.Status.NodeRefs)
358+
if err != nil {
359+
return sshInfo, errors.Wrap(err, "failed to get ready nodes")
344360
}
345-
for j := range p.Status.NodeRefs {
346-
n := p.Status.NodeRefs[j]
361+
362+
if p.Spec.Replicas != nil && len(nodes) < int(*p.Spec.Replicas) {
363+
message := fmt.Sprintf("machine pool %s/%s expected replicas %d, but only found %d ready nodes", p.Namespace, p.Name, *p.Spec.Replicas, len(nodes))
364+
Log(message)
365+
return sshInfo, errors.New(message)
366+
}
367+
368+
for _, node := range nodes {
347369
sshInfo = append(sshInfo, nodeSSHInfo{
348370
Endpoint: cluster.Spec.ControlPlaneEndpoint.Host,
349-
Hostname: n.Name,
371+
Hostname: node.Name,
350372
Port: sshPort,
351373
})
352374
}
353-
354375
}
355376

356377
return sshInfo, nil
357378
}
358379

380+
func getReadyNodes(ctx context.Context, c client.Client, refs []corev1.ObjectReference) ([]corev1.Node, error) {
381+
var nodes []corev1.Node
382+
for _, ref := range refs {
383+
var node corev1.Node
384+
if err := c.Get(ctx, client.ObjectKey{
385+
Namespace: ref.Namespace,
386+
Name: ref.Name,
387+
}, &node); err != nil {
388+
if apierrors.IsNotFound(err) {
389+
// If 404, continue. Likely the node refs have not caught up to infra providers
390+
continue
391+
}
392+
393+
return nodes, err
394+
}
395+
396+
if !noderefutil.IsNodeReady(&node) {
397+
Logf("node is not ready and won't be counted for ssh info %s/%s", node.Namespace, node.Name)
398+
continue
399+
}
400+
401+
nodes = append(nodes, node)
402+
}
403+
404+
return nodes, nil
405+
}
406+
407+
func getWorkloadClient(ctx context.Context, c client.Client, namespace, clusterName string) (client.Client, error) {
408+
ctx, span := tele.Tracer().Start(ctx, "scope.MachinePoolMachineScope.getWorkloadClient")
409+
defer span.End()
410+
411+
obj := client.ObjectKey{
412+
Namespace: namespace,
413+
Name: clusterName,
414+
}
415+
dataBytes, err := utilkubeconfig.FromSecret(ctx, c, obj)
416+
if err != nil {
417+
return nil, errors.Wrapf(err, "\"%s-kubeconfig\" not found in namespace %q", obj.Name, obj.Namespace)
418+
}
419+
420+
cfg, err := clientcmd.Load(dataBytes)
421+
if err != nil {
422+
return nil, errors.Wrapf(err, "failed to load \"%s-kubeconfig\" in namespace %q", obj.Name, obj.Namespace)
423+
}
424+
425+
restConfig, err := clientcmd.NewDefaultClientConfig(*cfg, &clientcmd.ConfigOverrides{}).ClientConfig()
426+
if err != nil {
427+
return nil, errors.Wrapf(err, "failed transform config \"%s-kubeconfig\" in namespace %q", obj.Name, obj.Namespace)
428+
}
429+
430+
return client.New(restConfig, client.Options{})
431+
}
432+
359433
// getMachinesInCluster returns a list of all machines in the given cluster.
360434
// This is adapted from CAPI's test/framework/cluster_proxy.go.
361435
func getMachinesInCluster(ctx context.Context, c framework.Lister, namespace, name string) (*clusterv1.MachineList, error) {

0 commit comments

Comments
 (0)