From 6f7602982b5f683417ec6ae690957026f9389204 Mon Sep 17 00:00:00 2001 From: Claudiu Belu Date: Tue, 28 Jan 2025 17:43:37 +0000 Subject: [PATCH 1/2] Leave only cluster scaling and refresh Run test multiple times --- .github/workflows/e2e.yaml | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 714723fd..d80ca95d 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -60,13 +60,26 @@ jobs: strategy: matrix: ginkgo_focus: - - "KCP remediation" - - "MachineDeployment remediation" - - "Workload cluster creation" - - "Workload cluster scaling" - - "Workload cluster upgrade" - "Certificate Refresh" - - "Orchestrated In place upgrades" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Certificate Refresh" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" + - "Workload cluster scaling" # TODO(ben): Remove once all tests are running stable. fail-fast: false steps: From fe87cb18352fe5a57c64b20267b0f58c1981ef99 Mon Sep 17 00:00:00 2001 From: Claudiu Belu Date: Wed, 29 Jan 2025 10:13:50 +0000 Subject: [PATCH 2/2] Adds test service logs getter --- .github/workflows/e2e.yaml | 21 +++++++++++++++++++++ pebble-follower.sh | 26 ++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100755 pebble-follower.sh diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index d80ca95d..1877f4bd 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -124,7 +124,28 @@ jobs: sudo sysctl fs.inotify.max_user_instances=8192 - name: Run e2e tests run: | + set +e + ./pebble-follower.sh & sudo GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e + exit_code="$?" + + # Stop log collectors. + ps aux | grep "docker exec" | awk '{print $2}' | xargs kill + + tar -czvf test-logs.tar.gz docker-ps.txt logs + + # Generate a random string for the artefact name. + echo "RANDOM_STRING=$(tr -dc A-Za-z0-9 > $GITHUB_ENV + exit $exit_code + + - name: Upload test service logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: e2e-test-logs-${{ github.run_id }}-${{ env.RANDOM_STRING }} + path: | + test-logs.tar.gz + - name: Change artifact permissions if: always() run: | diff --git a/pebble-follower.sh b/pebble-follower.sh new file mode 100755 index 00000000..246ed891 --- /dev/null +++ b/pebble-follower.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -x + +services="containerd k8s-dqlite k8sd kube-apiserver kube-controller-manager kube-proxy kube-scheduler kubelet" + +mkdir -p logs + +until [ `docker ps -a | grep "control-plane-" | wc -l` = "6" ]; do + echo "Waiting for control plane nodes..."; + sleep 15 +done + +# k8s services may still need to be initialized before we start collecting logs. +sleep 30 + +docker ps -a > docker-ps.txt + +while read -r container; do + container_name="${container##* }" + mkdir logs/$container_name + + for service in $services; do + nohup docker exec $container_name pebble logs -f -n 1000000 $service >> logs/$container_name/$service.log & + done +done < <(docker ps -a | grep control-plane-)