Skip to content

Commit 9129fdb

Browse files
authored
Merge pull request #1595 from Nordix/lentzi90/e2e-common-machine-log-collection
🌱 E2e: Unify machine log collection
2 parents 864f0d8 + 9ad36cf commit 9129fdb

File tree

4 files changed

+74
-154
lines changed

4 files changed

+74
-154
lines changed

test/e2e/shared/common.go

Lines changed: 20 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ import (
2626
"os"
2727
"path"
2828
"path/filepath"
29+
"strings"
2930
"time"
3031

3132
. "github.com/onsi/ginkgo/v2"
3233
. "github.com/onsi/gomega"
3334
corev1 "k8s.io/api/core/v1"
3435
"k8s.io/apimachinery/pkg/types"
35-
"k8s.io/apimachinery/pkg/util/sets"
3636
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3737
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
3838
"sigs.k8s.io/cluster-api/test/framework"
@@ -57,6 +57,9 @@ func SetupSpecNamespace(ctx context.Context, specName string, e2eCtx *E2EContext
5757
return namespace
5858
}
5959

60+
// DumpSpecResourcesAndCleanup dumps all the resources in the spec namespace.
61+
// This includes OpenStack resources and all the CAPI/CAPO resources in Kubernetes.
62+
// It also then cleanups the cluster object and the spec namespace itself.
6063
func DumpSpecResourcesAndCleanup(ctx context.Context, specName string, namespace *corev1.Namespace, e2eCtx *E2EContext) {
6164
Logf("Running DumpSpecResourcesAndCleanup for namespace %q", namespace.Name)
6265
// Dump all Cluster API related resources to artifacts before deleting them.
@@ -65,9 +68,6 @@ func DumpSpecResourcesAndCleanup(ctx context.Context, specName string, namespace
6568
dumpAllResources := func(directory ...string) {
6669
dumpSpecResources(ctx, e2eCtx, namespace, directory...)
6770
dumpOpenStack(ctx, e2eCtx, e2eCtx.Environment.BootstrapClusterProxy.GetName(), directory...)
68-
69-
Logf("Dumping all OpenStack server instances in the %q namespace", namespace.Name)
70-
dumpMachines(ctx, e2eCtx, namespace, directory...)
7171
}
7272

7373
dumpAllResources()
@@ -100,40 +100,8 @@ func DumpSpecResourcesAndCleanup(ctx context.Context, specName string, namespace
100100
delete(e2eCtx.Environment.Namespaces, namespace)
101101
}
102102

103-
func dumpMachines(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.Namespace, directory ...string) {
104-
cluster, err := ClusterForSpec(ctx, e2eCtx, namespace)
105-
if err != nil {
106-
_, _ = fmt.Fprintf(GinkgoWriter, "cannot dump machines, couldn't get cluster in namespace %s: %v\n", namespace.Name, err)
107-
return
108-
}
109-
if cluster.Status.Bastion == nil || cluster.Status.Bastion.FloatingIP == "" {
110-
_, _ = fmt.Fprintln(GinkgoWriter, "cannot dump machines, cluster doesn't has a bastion host (yet) with a floating ip")
111-
return
112-
}
113-
machines, err := machinesForSpec(ctx, e2eCtx.Environment.BootstrapClusterProxy, namespace)
114-
if err != nil {
115-
_, _ = fmt.Fprintf(GinkgoWriter, "cannot dump machines, could not get machines: %v\n", err)
116-
return
117-
}
118-
119-
machineNames := sets.New[string]()
120-
for _, machine := range machines.Items {
121-
machineNames.Insert(machine.Name)
122-
}
123-
srvs, err := GetOpenStackServers(e2eCtx, cluster, machineNames)
124-
if err != nil {
125-
_, _ = fmt.Fprintf(GinkgoWriter, "cannot dump machines, could not get servers from OpenStack: %v\n", err)
126-
return
127-
}
128-
for _, m := range machines.Items {
129-
srv, ok := srvs[m.Name]
130-
if !ok {
131-
continue
132-
}
133-
dumpMachine(ctx, e2eCtx, m, srv, cluster.Status.Bastion.FloatingIP, directory...)
134-
}
135-
}
136-
103+
// ClusterForSpec returns the OpenStackCluster in the given namespace.
104+
// It is considered an error if more than 1 OpenStackCluster is found.
137105
func ClusterForSpec(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.Namespace) (*infrav1.OpenStackCluster, error) {
138106
lister := e2eCtx.Environment.BootstrapClusterProxy.GetClient()
139107
list := new(infrav1.OpenStackClusterList)
@@ -146,88 +114,7 @@ func ClusterForSpec(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.N
146114
return &list.Items[0], nil
147115
}
148116

149-
func machinesForSpec(ctx context.Context, clusterProxy framework.ClusterProxy, namespace *corev1.Namespace) (*infrav1.OpenStackMachineList, error) {
150-
list := new(infrav1.OpenStackMachineList)
151-
if err := clusterProxy.GetClient().List(ctx, list, client.InNamespace(namespace.GetName())); err != nil {
152-
return nil, fmt.Errorf("error listing machines: %v", err)
153-
}
154-
return list, nil
155-
}
156-
157-
func dumpMachine(ctx context.Context, e2eCtx *E2EContext, machine infrav1.OpenStackMachine, srv ServerExtWithIP, bastionIP string, directory ...string) {
158-
paths := append([]string{e2eCtx.Settings.ArtifactFolder, "clusters", e2eCtx.Environment.BootstrapClusterProxy.GetName()}, directory...)
159-
logPath := filepath.Join(paths...)
160-
machineLogBase := path.Join(logPath, "instances", machine.Namespace, machine.Name)
161-
metaLog := path.Join(machineLogBase, "instance.log")
162-
163-
if err := os.MkdirAll(filepath.Dir(metaLog), 0o750); err != nil {
164-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't create directory %q for file: %s\n", metaLog, err)
165-
}
166-
167-
f, err := os.OpenFile(metaLog, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
168-
if err != nil {
169-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't open file %q: %s\n", metaLog, err)
170-
return
171-
}
172-
defer f.Close()
173-
174-
serverJSON, err := json.MarshalIndent(srv, "", " ")
175-
if err != nil {
176-
_, _ = fmt.Fprintf(GinkgoWriter, "error marshalling server %v: %s", srv, err)
177-
}
178-
if err := os.WriteFile(path.Join(machineLogBase, "server.txt"), serverJSON, 0o600); err != nil {
179-
_, _ = fmt.Fprintf(GinkgoWriter, "error writing server JSON %s: %s", serverJSON, err)
180-
}
181-
182-
srvUser := e2eCtx.E2EConfig.GetVariable(SSHUserMachine)
183-
184-
_, _ = fmt.Fprintf(f, "instance found: %q\n", srv.ID)
185-
executeCommands(
186-
ctx,
187-
e2eCtx.Settings.ArtifactFolder,
188-
e2eCtx.Settings.Debug,
189-
filepath.Dir(f.Name()),
190-
srv.ip,
191-
bastionIP,
192-
srvUser,
193-
[]command{
194-
// don't do this for now, it just takes to long
195-
// {
196-
// title: "systemd",
197-
// cmd: "journalctl --no-pager --output=short-precise | grep -v 'audit:\\|audit\\['",
198-
// },
199-
{
200-
title: "kern",
201-
cmd: "journalctl --no-pager --output=short-precise -k",
202-
},
203-
{
204-
title: "containerd-info",
205-
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock info",
206-
},
207-
{
208-
title: "containerd-containers",
209-
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock ps",
210-
},
211-
{
212-
title: "containerd-pods",
213-
cmd: "crictl --runtime-endpoint unix:///run/containerd/containerd.sock pods",
214-
},
215-
{
216-
title: "cloud-final",
217-
cmd: "journalctl --no-pager -u cloud-final",
218-
},
219-
{
220-
title: "kubelet",
221-
cmd: "journalctl --no-pager -u kubelet.service",
222-
},
223-
{
224-
title: "containerd",
225-
cmd: "journalctl --no-pager -u containerd.service",
226-
},
227-
},
228-
)
229-
}
230-
117+
// dumpSpecResources dumps all CAPI/CAPO resources to yaml.
231118
func dumpSpecResources(ctx context.Context, e2eCtx *E2EContext, namespace *corev1.Namespace, directory ...string) {
232119
paths := append([]string{e2eCtx.Settings.ArtifactFolder, "clusters", e2eCtx.Environment.BootstrapClusterProxy.GetName(), "resources"}, directory...)
233120
framework.DumpAllResources(ctx, framework.DumpAllResourcesInput{
@@ -287,26 +174,23 @@ func getOpenStackClusterFromMachine(ctx context.Context, client client.Client, m
287174
return openStackCluster, err
288175
}
289176

177+
// getIDFromProviderID returns the server ID part of a provider ID string.
178+
func getIDFromProviderID(providerID string) string {
179+
return strings.TrimPrefix(providerID, "openstack:///")
180+
}
181+
290182
type OpenStackLogCollector struct {
291-
E2EContext E2EContext
183+
E2EContext *E2EContext
292184
}
293185

294186
// CollectMachineLog gets logs for the OpenStack resources related to the given machine.
295187
func (o OpenStackLogCollector) CollectMachineLog(ctx context.Context, managementClusterClient client.Client, m *clusterv1.Machine, outputPath string) error {
296-
machineLogBase := path.Join(outputPath, "instances", m.Namespace, m.Name)
297-
metaLog := path.Join(machineLogBase, "instance.log")
188+
Logf("Collecting logs for machine %q and storing them in %q", m.ObjectMeta.Name, outputPath)
298189

299-
if err := os.MkdirAll(filepath.Dir(metaLog), 0o750); err != nil {
300-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't create directory %q for file: %s\n", metaLog, err)
190+
if err := os.MkdirAll(outputPath, 0o750); err != nil {
191+
return fmt.Errorf("couldn't create directory %q for logs: %s", outputPath, err)
301192
}
302193

303-
f, err := os.OpenFile(metaLog, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
304-
if err != nil {
305-
_, _ = fmt.Fprintf(GinkgoWriter, "couldn't open file %q: %s\n", metaLog, err)
306-
return nil
307-
}
308-
defer f.Close()
309-
310194
openStackCluster, err := getOpenStackClusterFromMachine(ctx, managementClusterClient, m)
311195
if err != nil {
312196
return fmt.Errorf("error getting OpenStackCluster for Machine: %s", err)
@@ -317,30 +201,25 @@ func (o OpenStackLogCollector) CollectMachineLog(ctx context.Context, management
317201
}
318202
ip := m.Status.Addresses[0].Address
319203

320-
srvs, err := GetOpenStackServers(&o.E2EContext, openStackCluster, sets.New(m.Spec.InfrastructureRef.Name))
204+
srv, err := GetOpenStackServerWithIP(o.E2EContext, getIDFromProviderID(*m.Spec.ProviderID), openStackCluster)
321205
if err != nil {
322-
return fmt.Errorf("cannot dump machines, could not get servers from OpenStack: %v", err)
323-
}
324-
if len(srvs) != 1 {
325-
return fmt.Errorf("expected exactly 1 server but got %d", len(srvs))
206+
return fmt.Errorf("error getting OpenStack server: %w", err)
326207
}
327-
srv := srvs[m.Spec.InfrastructureRef.Name]
328208

329209
serverJSON, err := json.MarshalIndent(srv, "", " ")
330210
if err != nil {
331211
return fmt.Errorf("error marshalling server %v: %s", srv, err)
332212
}
333-
if err := os.WriteFile(path.Join(machineLogBase, "server.txt"), serverJSON, 0o600); err != nil {
213+
if err := os.WriteFile(path.Join(outputPath, "server.txt"), serverJSON, 0o600); err != nil {
334214
return fmt.Errorf("error writing server JSON %s: %s", serverJSON, err)
335215
}
336-
_, _ = fmt.Fprintf(f, "instance found: %q\n", srv.ID)
337216

338217
srvUser := o.E2EContext.E2EConfig.GetVariable(SSHUserMachine)
339218
executeCommands(
340219
ctx,
341220
o.E2EContext.Settings.ArtifactFolder,
342221
o.E2EContext.Settings.Debug,
343-
filepath.Dir(f.Name()),
222+
outputPath,
344223
ip,
345224
openStackCluster.Status.Bastion.FloatingIP,
346225
srvUser,

test/e2e/shared/openstack.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,41 @@ func GetOpenStackServers(e2eCtx *E2EContext, openStackCluster *infrav1.OpenStack
485485
return srvs, nil
486486
}
487487

488+
// GetOpenStackServer returns the server with the given ID along with the first
489+
// IP address it has in the OpenStackCluster network.
490+
func GetOpenStackServerWithIP(e2eCtx *E2EContext, id string, openStackCluster *infrav1.OpenStackCluster) (ServerExtWithIP, error) {
491+
srvExtWithIP := ServerExtWithIP{}
492+
providerClient, clientOpts, _, err := GetTenantProviderClient(e2eCtx)
493+
if err != nil {
494+
_, _ = fmt.Fprintf(GinkgoWriter, "error creating provider client: %s\n", err)
495+
return srvExtWithIP, nil
496+
}
497+
498+
computeClient, err := clients.NewComputeClient(providerClient, clientOpts)
499+
if err != nil {
500+
return srvExtWithIP, fmt.Errorf("unable to create compute client: %w", err)
501+
}
502+
srvExt, err := computeClient.GetServer(id)
503+
if err != nil {
504+
return srvExtWithIP, fmt.Errorf("unable to get server: %w", err)
505+
}
506+
srvExtWithIP.ServerExt = *srvExt
507+
508+
instanceStatus := compute.NewInstanceStatusFromServer(srvExt, logr.Discard())
509+
instanceNS, err := instanceStatus.NetworkStatus()
510+
if err != nil {
511+
return srvExtWithIP, fmt.Errorf("error getting network status for server %s: %v", srvExt.Name, err)
512+
}
513+
514+
ip := instanceNS.IP(openStackCluster.Status.Network.Name)
515+
if ip == "" {
516+
return srvExtWithIP, fmt.Errorf("error getting internal ip for server %s: internal ip doesn't exist (yet)", srvExt.Name)
517+
}
518+
srvExtWithIP.ip = ip
519+
520+
return srvExtWithIP, nil
521+
}
522+
488523
func GetTenantProviderClient(e2eCtx *E2EContext) (*gophercloud.ProviderClient, *clientconfig.ClientOpts, *string, error) {
489524
openstackCloud := e2eCtx.E2EConfig.GetVariable(OpenStackCloud)
490525
return getProviderClient(e2eCtx, openstackCloud)

test/e2e/shared/suite.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ func AllNodesBeforeSuite(e2eCtx *E2EContext, data []byte) {
182182
e2eCtx.Settings.ArtifactFolder = conf.ArtifactFolder
183183
e2eCtx.Settings.ConfigPath = conf.ConfigPath
184184
e2eCtx.Environment.ClusterctlConfigPath = conf.ClusterctlConfigPath
185-
withLogCollector := framework.WithMachineLogCollector(OpenStackLogCollector{E2EContext: *e2eCtx})
185+
withLogCollector := framework.WithMachineLogCollector(OpenStackLogCollector{E2EContext: e2eCtx})
186186
e2eCtx.Environment.BootstrapClusterProxy = framework.NewClusterProxy("bootstrap", conf.KubeconfigPath, e2eCtx.Environment.Scheme, withLogCollector)
187187
e2eCtx.E2EConfig = &conf.E2EConfig
188188
e2eCtx.Settings.KubetestConfigFilePath = conf.KubetestConfigFilePath

test/e2e/suites/e2e/e2e_test.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ const specName = "e2e"
6565

6666
var _ = Describe("e2e tests [PR-Blocking]", func() {
6767
var (
68-
namespace *corev1.Namespace
69-
ctx context.Context
68+
namespace *corev1.Namespace
69+
clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
70+
ctx context.Context
7071

7172
// Cleanup functions which cannot run until after the cluster has been deleted
7273
postClusterCleanup []func()
@@ -77,6 +78,7 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
7778
ctx = context.TODO()
7879
// Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
7980
namespace = shared.SetupSpecNamespace(ctx, specName, e2eCtx)
81+
clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
8082
Expect(e2eCtx.E2EConfig).ToNot(BeNil(), "Invalid argument. e2eConfig can't be nil when calling %s spec", specName)
8183
Expect(e2eCtx.E2EConfig.Variables).To(HaveKey(shared.KubernetesVersion))
8284
shared.SetEnvVar("USE_CI_ARTIFACTS", "true", false)
@@ -91,7 +93,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
9193
configCluster.ControlPlaneMachineCount = pointer.Int64(3)
9294
configCluster.WorkerMachineCount = pointer.Int64(1)
9395
configCluster.Flavor = shared.FlavorDefault
94-
md := createCluster(ctx, configCluster)
96+
createCluster(ctx, configCluster, clusterResources)
97+
md := clusterResources.MachineDeployments
9598

9699
workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
97100
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
@@ -169,7 +172,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
169172
configCluster.ControlPlaneMachineCount = pointer.Int64(3)
170173
configCluster.WorkerMachineCount = pointer.Int64(1)
171174
configCluster.Flavor = shared.FlavorFlatcar
172-
md := createCluster(ctx, configCluster)
175+
createCluster(ctx, configCluster, clusterResources)
176+
md := clusterResources.MachineDeployments
173177

174178
workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
175179
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
@@ -210,7 +214,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
210214
configCluster.ControlPlaneMachineCount = pointer.Int64(1)
211215
configCluster.WorkerMachineCount = pointer.Int64(1)
212216
configCluster.Flavor = shared.FlavorWithoutLB
213-
md := createCluster(ctx, configCluster)
217+
createCluster(ctx, configCluster, clusterResources)
218+
md := clusterResources.MachineDeployments
214219

215220
workerMachines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
216221
Lister: e2eCtx.Environment.BootstrapClusterProxy.GetClient(),
@@ -343,7 +348,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
343348
configCluster.ControlPlaneMachineCount = pointer.Int64(1)
344349
configCluster.WorkerMachineCount = pointer.Int64(1)
345350
configCluster.Flavor = shared.FlavorMultiNetwork
346-
md = createCluster(ctx, configCluster)
351+
createCluster(ctx, configCluster, clusterResources)
352+
md = clusterResources.MachineDeployments
347353
})
348354

349355
It("should attach all machines to multiple networks", func() {
@@ -421,7 +427,7 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
421427
configCluster.ControlPlaneMachineCount = pointer.Int64(1)
422428
configCluster.WorkerMachineCount = pointer.Int64(0)
423429
configCluster.Flavor = shared.FlavorWithoutLB
424-
_ = createCluster(ctx, configCluster)
430+
createCluster(ctx, configCluster, clusterResources)
425431

426432
shared.Logf("Creating Machine Deployment in an invalid Availability Zone")
427433
mdInvalidAZName := clusterName + "-md-invalid-az"
@@ -474,7 +480,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
474480
configCluster.ControlPlaneMachineCount = pointer.Int64(3)
475481
configCluster.WorkerMachineCount = pointer.Int64(2)
476482
configCluster.Flavor = shared.FlavorMultiAZ
477-
md = createCluster(ctx, configCluster)
483+
createCluster(ctx, configCluster, clusterResources)
484+
md = clusterResources.MachineDeployments
478485

479486
var err error
480487
cluster, err = shared.ClusterForSpec(ctx, e2eCtx, namespace)
@@ -595,6 +602,8 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
595602

596603
AfterEach(func() {
597604
shared.SetEnvVar("USE_CI_ARTIFACTS", "false", false)
605+
shared.Logf("Attempting to collect logs for cluster %q in namespace %q", clusterResources.Cluster.Name, namespace.Name)
606+
e2eCtx.Environment.BootstrapClusterProxy.CollectWorkloadClusterLogs(ctx, namespace.Name, clusterResources.Cluster.Name, filepath.Join(e2eCtx.Settings.ArtifactFolder, "clusters", e2eCtx.Environment.BootstrapClusterProxy.GetName(), namespace.Name))
598607
// Dumps all the resources in the spec namespace, then cleanups the cluster object and the spec namespace itself.
599608
shared.DumpSpecResourcesAndCleanup(ctx, specName, namespace, e2eCtx)
600609

@@ -605,17 +614,14 @@ var _ = Describe("e2e tests [PR-Blocking]", func() {
605614
})
606615
})
607616

608-
func createCluster(ctx context.Context, configCluster clusterctl.ConfigClusterInput) []*clusterv1.MachineDeployment {
609-
result := &clusterctl.ApplyClusterTemplateAndWaitResult{}
617+
func createCluster(ctx context.Context, configCluster clusterctl.ConfigClusterInput, result *clusterctl.ApplyClusterTemplateAndWaitResult) {
610618
clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
611619
ClusterProxy: e2eCtx.Environment.BootstrapClusterProxy,
612620
ConfigCluster: configCluster,
613621
WaitForClusterIntervals: e2eCtx.E2EConfig.GetIntervals(specName, "wait-cluster"),
614622
WaitForControlPlaneIntervals: e2eCtx.E2EConfig.GetIntervals(specName, "wait-control-plane"),
615623
WaitForMachineDeployments: e2eCtx.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
616624
}, result)
617-
618-
return result.MachineDeployments
619625
}
620626

621627
func defaultConfigCluster(clusterName, namespace string) clusterctl.ConfigClusterInput {

0 commit comments

Comments
 (0)