Skip to content

Commit 9ad36cf

Browse files
committed
E2e: Unify machine log collection
This unifies the e2e log collection for machines so that it uses the LogCollector interface. It removes the instance.log which was just showing the ID of the openstack server and simplifies the code a bit.
1 parent facb1f0 commit 9ad36cf

File tree

4 files changed

+74
-154
lines changed

4 files changed

+74
-154
lines changed

test/e2e/shared/common.go

Lines changed: 20 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ import (
2626
"os"
2727
"path"
2828
"path/filepath"
29+
"strings"
2930
"time"
3031

3132
. "github.com/onsi/ginkgo/v2"
3233
. "github.com/onsi/gomega"
3334
corev1 "k8s.io/api/core/v1"
3435
"k8s.io/apimachinery/pkg/types"
35-
"k8s.io/apimachinery/pkg/util/sets"
3636
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3737
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
3838
"sigs.k8s.io/cluster-api/test/framework"
@@ -57,6 +57,9 @@ func SetupSpecNamespace(ctx context.Context, specName string, e2eCtx *E2EContext
5757
return namespace
5858
}
5959

60+
// DumpSpecResourcesAndCleanup dumps all the resources in the spec namespace.
61+
// This includes OpenStack resources and all the CAPI/CAPO resources in Kubernetes.
62+
// It also then cleanups the cluster object and the spec namespace itself.
6063
func DumpSpecResourcesAndCleanup(ctx context.Context, specName string, namespace *corev1.Namespace, e2eCtx *E2EContext) {
6164
Logf("Running DumpSpecResourcesAndCleanup for namespace %q", namespace.Name)
6265
// Dump all Cluster API related resources to artifacts before deleting them.
@@ -65,9 +68,6 @@ func DumpSpecResourcesAndCleanup(ctx context.Context, specName string, namespace
6568
dumpAllResources := func(directory ...string) {
6669
dumpSpecResources(ctx, e2eCtx, namespace, directory...)
6770
dumpOpenStack(ctx, e2eCtx, e2eCtx.Environment.BootstrapClusterProxy.GetName(), directory...)
68-
69-
Logf("Dumping all OpenStack server instances in the %q namespace", namespace.Name)
70-
dumpMachines(ctx, e2eCtx, namespace, directory...)
7171
}
7272

7373
dumpAllResources()
@@ -100,40 +100,8 @@ func DumpSpecResourcesAndCleanup(ctx context.Context, specName string, namespace
100100
delete(e2eCtx.Environment.Namespaces, namespace)
101101
}
102102

103-
func dumpMachines(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.Namespace, directory ...string) {
104-
cluster, err := ClusterForSpec(ctx, e2eCtx, namespace)
105-
if err != nil {
106-
_, _ = fmt.Fprintf(GinkgoWriter, "cannot dump machines, couldn't get cluster in namespace %s: %v\n", namespace.Name, err)
107-
return
108-
}
109-
if cluster.Status.Bastion == nil || cluster.Status.Bastion.FloatingIP == "" {
110-
_, _ = fmt.Fprintln(GinkgoWriter, "cannot dump machines, cluster doesn't has a bastion host (yet) with a floating ip")
111-
return
112-
}
113-
machines, err := machinesForSpec(ctx, e2eCtx.Environment.BootstrapClusterProxy, namespace)
114-
if err != nil {
115-
_, _ = fmt.Fprintf(GinkgoWriter, "cannot dump machines, could not get machines: %v\n", err)
116-
return
117-
}
118-
119-
machineNames := sets.New[string]()
120-
for _, machine := range machines.Items {
121-
machineNames.Insert(machine.Name)
122-
}
123-
srvs, err := GetOpenStackServers(e2eCtx, cluster, machineNames)
124-
if err != nil {
125-
_, _ = fmt.Fprintf(GinkgoWriter, "cannot dump machines, could not get servers from OpenStack: %v\n", err)
126-
return
127-
}
128-
for _, m := range machines.Items {
129-
srv, ok := srvs[m.Name]
130-
if !ok {
131-
continue
132-
}
133-
dumpMachine(ctx, e2eCtx, m, srv, cluster.Status.Bastion.FloatingIP, directory...)
134-
}
135-
}
136-
103+
// ClusterForSpec returns the OpenStackCluster in the given namespace.
104+
// It is considered an error if more than 1 OpenStackCluster is found.
137105
func ClusterForSpec(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.Namespace) (*infrav1.OpenStackCluster, error) {
138106
lister := e2eCtx.Environment.BootstrapClusterProxy.GetClient()
139107
list := new(infrav1.OpenStackClusterList)
@@ -146,88 +114,7 @@ func ClusterForSpec(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.N
146114
return &list.Items[0], nil
147115
}
148116

149-
func machinesForSpec(ctx context.Context, clusterProxy framework.ClusterProxy, namespace *corev1.Namespace) (*infrav1.OpenStackMachineList, error) {
150-
list := new(infrav1.OpenStackMachineList)
151-
if err := clusterProxy.GetClient().List(ctx, list, client.InNamespace(namespace.GetName())); err != nil {
152-
return nil, fmt.Errorf("error listing machines: %v", err)
153-
}
154-
return list, nil
155-
}
156-
157-
func dumpMachine(ctx context.Context, e2eCtx *E2EContext, machine infrav1.OpenStackMachine, srv ServerExtWithIP, bastionIP string, directory ...string) {
158-
paths := append([]string{e2eCtx.Settings.ArtifactFolder, "clusters", e2eCtx.Environment.BootstrapClusterProxy.GetName()}, directory...)
159-
logPath := filepath.Join(paths...)
160-
machineLogBase := path.Join(logPath, "instances", machine.Namespace, machine.Name)
161-
metaLog := path.Join(machineLogBase, "instance.log")
162-
163-
if err := os.MkdirAll(filepath.Dir(metaLog), 0o750); err != nil {
164-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't create directory %q for file: %s\n", metaLog, err)
165-
}
166-
167-
f, err := os.OpenFile(metaLog, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
168-
if err != nil {
169-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't open file %q: %s\n", metaLog, err)
170-
return
171-
}
172-
defer f.Close()
173-
174-
serverJSON, err := json.MarshalIndent(srv, "", " ")
175-
if err != nil {
176-
_, _ = fmt.Fprintf(GinkgoWriter, "error marshalling server %v: %s", srv, err)
177-
}
178-
if err := os.WriteFile(path.Join(machineLogBase, "server.txt"), serverJSON, 0o600); err != nil {
179-
_, _ = fmt.Fprintf(GinkgoWriter, "error writing server JSON %s: %s", serverJSON, err)
180-
}
181-
182-
srvUser := e2eCtx.E2EConfig.GetVariable(SSHUserMachine)
183-
184-
_, _ = fmt.Fprintf(f, "instance found: %q\n", srv.ID)
185-
executeCommands(
186-
ctx,
187-
e2eCtx.Settings.ArtifactFolder,
188-
e2eCtx.Settings.Debug,
189-
filepath.Dir(f.Name()),
190-
srv.ip,
191-
bastionIP,
192-
srvUser,
193-
[]command{
194-
// don't do this for now, it just takes to long
195-
// {
196-
// title: "systemd",
197-
// cmd: "journalctl --no-pager --output=short-precise | grep -v 'audit:\\|audit\\['",
198-
// },
199-
{
200-
title: "kern",
201-
cmd: "journalctl --no-pager --output=short-precise -k",
202-
},
203-
{
204-
title: "containerd-info",
205-
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock info",
206-
},
207-
{
208-
title: "containerd-containers",
209-
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps",
210-
},
211-
{
212-
title: "containerd-pods",
213-
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock pods",
214-
},
215-
{
216-
title: "cloud-final",
217-
cmd: "journalctl --no-pager -u cloud-final",
218-
},
219-
{
220-
title: "kubelet",
221-
cmd: "journalctl --no-pager -u kubelet.service",
222-
},
223-
{
224-
title: "containerd",
225-
cmd: "journalctl --no-pager -u containerd.service",
226-
},
227-
},
228-
)
229-
}
230-
117+
// dumpSpecResources dumps all CAPI/CAPO resources to yaml.
231118
func dumpSpecResources(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.Namespace, directory ...string) {
232119
paths := append([]string{e2eCtx.Settings.ArtifactFolder, "clusters", e2eCtx.Environment.BootstrapClusterProxy.GetName(), "resources"}, directory...)
233120
framework.DumpAllResources(ctx, framework.DumpAllResourcesInput{
@@ -287,26 +174,23 @@ func getOpenStackClusterFromMachine(ctx context.Context, client client.Client, m
287174
return openStackCluster, err
288175
}
289176

177+
// getIDFromProviderID returns the server ID part of a provider ID string.
178+
func getIDFromProviderID(providerID string) string {
179+
return strings.TrimPrefix(providerID, "openstack:///")
180+
}
181+
290182
type OpenStackLogCollector struct {
291-
E2EContext E2EContext
183+
E2EContext *E2EContext
292184
}
293185

294186
// CollectMachineLog gets logs for the OpenStack resources related to the given machine.
295187
func (o OpenStackLogCollector) CollectMachineLog(ctx context.Context, managementClusterClient client.Client, m *clusterv1.Machine, outputPath string) error {
296-
machineLogBase := path.Join(outputPath, "instances", m.Namespace, m.Name)
297-
metaLog := path.Join(machineLogBase, "instance.log")
188+
Logf("Collecting logs for machine %q and storing them in %q", m.ObjectMeta.Name, outputPath)
298189

299-
if err := os.MkdirAll(filepath.Dir(metaLog), 0o750); err != nil {
300-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't create directory %q for file: %s\n", metaLog, err)
190+
if err := os.MkdirAll(outputPath, 0o750); err != nil {
191+
return fmt.Errorf("couldn't create directory %q for logs: %s", outputPath, err)
301192
}
302193

303-
f, err := os.OpenFile(metaLog, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
304-
if err != nil {
305-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't open file %q: %s\n", metaLog, err)
306-
return nil
307-
}
308-
defer f.Close()
309-
310194
openStackCluster, err := getOpenStackClusterFromMachine(ctx, managementClusterClient, m)
311195
if err != nil {
312196
return fmt.Errorf("error getting OpenStackCluster for Machine: %s", err)
@@ -317,30 +201,25 @@ func (o OpenStackLogCollector) CollectMachineLog(ctx context.Context, management
317201
}
318202
ip := m.Status.Addresses[0].Address
319203

320-
srvs, err := GetOpenStackServers(&o.E2EContext, openStackCluster, sets.New(m.Spec.InfrastructureRef.Name))
204+
srv, err := GetOpenStackServerWithIP(o.E2EContext, getIDFromProviderID(*m.Spec.ProviderID), openStackCluster)
321205
if err != nil {
322-
return fmt.Errorf("cannot dump machines, could not get servers from OpenStack: %v", err)
323-
}
324-
if len(srvs) != 1 {
325-
return fmt.Errorf("expected exactly 1 server but got %d", len(srvs))
206+
return fmt.Errorf("error getting OpenStack server: %w", err)
326207
}
327-
srv := srvs[m.Spec.InfrastructureRef.Name]
328208

329209
serverJSON, err := json.MarshalIndent(srv, "", " ")
330210
if err != nil {
331211
return fmt.Errorf("error marshalling server %v: %s", srv, err)
332212
}
333-
if err := os.WriteFile(path.Join(machineLogBase, "server.txt"), serverJSON, 0o600); err != nil {
213+
if err := os.WriteFile(path.Join(outputPath, "server.txt"), serverJSON, 0o600); err != nil {
334214
return fmt.Errorf("error writing server JSON %s: %s", serverJSON, err)
335215
}
336-
_, _ = fmt.Fprintf(f, "instance found: %q\n", srv.ID)
337216

338217
srvUser := o.E2EContext.E2EConfig.GetVariable(SSHUserMachine)
339218
executeCommands(
340219
ctx,
341220
o.E2EContext.Settings.ArtifactFolder,
342221
o.E2EContext.Settings.Debug,
343-
filepath.Dir(f.Name()),
222+
outputPath,
344223
ip,
345224
openStackCluster.Status.Bastion.FloatingIP,
346225
srvUser,

test/e2e/shared/openstack.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,41 @@ func GetOpenStackServers(e2eCtx *E2EContext, openStackCluster *infrav1.OpenStack
485485
return srvs, nil
486486
}
487487

488+
// GetOpenStackServer returns the server with the given ID along with the first
489+
// IP address it has in the OpenStackCluster network.
490+
func GetOpenStackServerWithIP(e2eCtx *E2EContext, id string, openStackCluster *infrav1.OpenStackCluster) (ServerExtWithIP, error) {
491+
srvExtWithIP := ServerExtWithIP{}
492+
providerClient, clientOpts, _, err := GetTenantProviderClient(e2eCtx)
493+
if err != nil {
494+
_, _ = fmt.Fprintf(GinkgoWriter, "error creating provider client: %s\n", err)
495+
return srvExtWithIP, nil
496+
}
497+
498+
computeClient, err := clients.NewComputeClient(providerClient, clientOpts)
499+
if err != nil {
500+
return srvExtWithIP, fmt.Errorf("unable to create compute client: %w", err)
501+
}
502+
srvExt, err := computeClient.GetServer(id)
503+
if err != nil {
504+
return srvExtWithIP, fmt.Errorf("unable to get server: %w", err)
505+
}
506+
srvExtWithIP.ServerExt = *srvExt
507+
508+
instanceStatus := compute.NewInstanceStatusFromServer(srvExt, logr.Discard())
509+
instanceNS, err := instanceStatus.NetworkStatus()
510+
if err != nil {
511+
return srvExtWithIP, fmt.Errorf("error getting network status for server %s: %v", srvExt.Name, err)
512+
}
513+
514+
ip := instanceNS.IP(openStackCluster.Status.Network.Name)
515+
if ip == "" {
516+
return srvExtWithIP, fmt.Errorf("error getting internal ip for server %s: internal ip doesn't exist (yet)", srvExt.Name)
517+
}
518+
srvExtWithIP.ip = ip
519+
520+
return srvExtWithIP, nil
521+
}
522+
488523
func GetTenantProviderClient(e2eCtx *E2EContext) (*gophercloud.ProviderClient, *clientconfig.ClientOpts, *string, error) {
489524
openstackCloud := e2eCtx.E2EConfig.GetVariable(OpenStackCloud)
490525
return getProviderClient(e2eCtx, openstackCloud)

test/e2e/shared/suite.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ func AllNodesBeforeSuite(e2eCtx *E2EContext, data []byte) {
182182
e2eCtx.Settings.ArtifactFolder = conf.ArtifactFolder
183183
e2eCtx.Settings.ConfigPath = conf.ConfigPath
184184
e2eCtx.Environment.ClusterctlConfigPath = conf.ClusterctlConfigPath
185-
withLogCollector := framework.WithMachineLogCollector(OpenStackLogCollector{E2EContext: *e2eCtx})
185+
withLogCollector := framework.WithMachineLogCollector(OpenStackLogCollector{E2EContext: e2eCtx})
186186
e2eCtx.Environment.BootstrapClusterProxy = framework.NewClusterProxy("bootstrap", conf.KubeconfigPath, e2eCtx.Environment.Scheme, withLogCollector)
187187
e2eCtx.E2EConfig = &conf.E2EConfig
188188
e2eCtx.Settings.KubetestConfigFilePath = conf.KubetestConfigFilePath

test/e2e/suites/e2e/e2e_test.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ const specName = "e2e"
6565

6666
var _ = Describe("e2e tests [PR-Blocking]", func() {
6767
var (
68-
namespace *corev1.Namespace
69-
ctx context.Context
68+
namespace *corev1.Namespace
69+
clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
70+
ctx context.Context
7071

7172
// Cleanup functions which cannot run until after the cluster has been deleted
7273
postClusterCleanup []func()
@@ -77,6 +78,7 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
7778
ctx = context.TODO()
7879
// Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
7980
namespace = shared.SetupSpecNamespace(ctx, specName, e2eCtx)
81+
clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
8082
Expect(e2eCtx.E2EConfig).ToNot(BeNil(), "Invalid argument. e2eConfig can't be nil when calling %s spec", specName)
8183
Expect(e2eCtx.E2EConfig.Variables).To(HaveKey(shared.KubernetesVersion))
8284
shared.SetEnvVar("USE_CI_ARTIFACTS", "true", false)
@@ -91,7 +93,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
9193
configCluster.ControlPlaneMachineCount = pointer.Int64(3)
9294
configCluster.WorkerMachineCount = pointer.Int64(1)
9395
configCluster.Flavor = shared.FlavorDefault
94-
md := createCluster(ctx, configCluster)
96+
createCluster(ctx, configCluster, clusterResources)
97+
md := clusterResources.MachineDeployments
9598

9699
workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
97100
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
@@ -169,7 +172,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
169172
configCluster.ControlPlaneMachineCount = pointer.Int64(3)
170173
configCluster.WorkerMachineCount = pointer.Int64(1)
171174
configCluster.Flavor = shared.FlavorFlatcar
172-
md := createCluster(ctx, configCluster)
175+
createCluster(ctx, configCluster, clusterResources)
176+
md := clusterResources.MachineDeployments
173177

174178
workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
175179
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
@@ -210,7 +214,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
210214
configCluster.ControlPlaneMachineCount = pointer.Int64(1)
211215
configCluster.WorkerMachineCount = pointer.Int64(1)
212216
configCluster.Flavor = shared.FlavorWithoutLB
213-
md := createCluster(ctx, configCluster)
217+
createCluster(ctx, configCluster, clusterResources)
218+
md := clusterResources.MachineDeployments
214219

215220
workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
216221
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
@@ -343,7 +348,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
343348
configCluster.ControlPlaneMachineCount = pointer.Int64(1)
344349
configCluster.WorkerMachineCount = pointer.Int64(1)
345350
configCluster.Flavor = shared.FlavorMultiNetwork
346-
md = createCluster(ctx, configCluster)
351+
createCluster(ctx, configCluster, clusterResources)
352+
md = clusterResources.MachineDeployments
347353
})
348354

349355
It("should attach all machines to multiple networks", func() {
@@ -421,7 +427,7 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
421427
configCluster.ControlPlaneMachineCount = pointer.Int64(1)
422428
configCluster.WorkerMachineCount = pointer.Int64(0)
423429
configCluster.Flavor = shared.FlavorWithoutLB
424-
_ = createCluster(ctx, configCluster)
430+
createCluster(ctx, configCluster, clusterResources)
425431

426432
shared.Logf("Creating Machine Deployment in an invalid Availability Zone")
427433
mdInvalidAZName := clusterName + "-md-invalid-az"
@@ -474,7 +480,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
474480
configCluster.ControlPlaneMachineCount = pointer.Int64(3)
475481
configCluster.WorkerMachineCount = pointer.Int64(2)
476482
configCluster.Flavor = shared.FlavorMultiAZ
477-
md = createCluster(ctx, configCluster)
483+
createCluster(ctx, configCluster, clusterResources)
484+
md = clusterResources.MachineDeployments
478485

479486
var err error
480487
cluster, err = shared.ClusterForSpec(ctx, e2eCtx, namespace)
@@ -595,6 +602,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
595602

596603
AfterEach(func() {
597604
shared.SetEnvVar("USE_CI_ARTIFACTS", "false", false)
605+
shared.Logf("Attempting to collect logs for cluster %q in namespace %q", clusterResources.Cluster.Name, namespace.Name)
606+
e2eCtx.Environment.BootstrapClusterProxy.CollectWorkloadClusterLogs(ctx, namespace.Name, clusterResources.Cluster.Name, filepath.Join(e2eCtx.Settings.ArtifactFolder, "clusters", e2eCtx.Environment.BootstrapClusterProxy.GetName(), namespace.Name))
598607
// Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself.
599608
shared.DumpSpecResourcesAndCleanup(ctx, specName, namespace, e2eCtx)
600609

@@ -605,17 +614,14 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
605614
})
606615
})
607616

608-
func createCluster(ctx context.Context, configCluster clusterctl.ConfigClusterInput) []*clusterv1.MachineDeployment {
609-
result := &clusterctl.ApplyClusterTemplateAndWaitResult{}
617+
func createCluster(ctx context.Context, configCluster clusterctl.ConfigClusterInput, result *clusterctl.ApplyClusterTemplateAndWaitResult) {
610618
clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
611619
ClusterProxy: e2eCtx.Environment.BootstrapClusterProxy,
612620
ConfigCluster: configCluster,
613621
WaitForClusterIntervals: e2eCtx.E2EConfig.GetIntervals(specName, "wait-cluster"),
614622
WaitForControlPlaneIntervals: e2eCtx.E2EConfig.GetIntervals(specName, "wait-control-plane"),
615623
WaitForMachineDeployments: e2eCtx.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
616624
}, result)
617-
618-
return result.MachineDeployments
619625
}
620626

621627
func defaultConfigCluster(clusterName, namespace string) clusterctl.ConfigClusterInput {

0 commit comments

Comments
 (0)