Skip to content

Commit 5467b56

Browse files
adamanciniclaude
andcommitted
refactor: separate cluster creation from test deployment for parallel execution
- Split test-deployment job into create-clusters and test-deployment jobs - Enable parallel cluster creation (max-parallel: 7) for all matrix combinations - Enable parallel test execution after clusters are ready - Improve resource utilization and reduce total workflow time - Add cluster matrix output for better job coordination 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent e592a75 commit 5467b56

File tree

1 file changed

+186
-5
lines changed

1 file changed

+186
-5
lines changed

.github/workflows/wg-easy-pr-validation.yaml

Lines changed: 186 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ jobs:
247247
echo "license-id=${{ steps.create-customer.outputs.license-id }}" >> $GITHUB_OUTPUT
248248
fi
249249
250-
test-deployment:
250+
create-clusters:
251251
runs-on: ubuntu-22.04
252252
needs: [setup, create-resources]
253253
strategy:
@@ -294,9 +294,9 @@ jobs:
294294
timeout-minutes: 30
295295
exclude: []
296296
fail-fast: false
297-
max-parallel: 4
297+
max-parallel: 7 # Allow all clusters to be created in parallel
298298
outputs:
299-
cluster-id: ${{ steps.set-cluster-outputs.outputs.cluster-id }}
299+
cluster-matrix: ${{ steps.set-cluster-matrix.outputs.cluster-matrix }}
300300
steps:
301301
- name: Set concurrency group
302302
run: |
@@ -500,10 +500,10 @@ jobs:
500500
--name "$CLUSTER_NAME" \
501501
--distribution "${{ matrix.distribution }}" \
502502
--version "${{ matrix.k8s-version }}" \
503-
--disk "${{ steps.dist-config.outputs.cluster-disk-size }}" \
503+
--disk "50" \
504504
--instance-type "${{ matrix.instance-type }}" \
505505
--nodes "${{ matrix.nodes }}" \
506-
--ttl "${{ steps.dist-config.outputs.cluster-ttl }}"
506+
--ttl "${{ matrix.distribution == 'eks' && '6h' || '4h' }}"
507507
508508
CLUSTER_CREATE_EXIT_CODE=$?
509509
if [ $CLUSTER_CREATE_EXIT_CODE -ne 0 ]; then
@@ -627,6 +627,187 @@ jobs:
627627
echo "Cluster info:"
628628
kubectl cluster-info
629629
630+
- name: Set cluster matrix output
631+
id: set-cluster-matrix
632+
run: |
633+
# Create cluster info for test deployment job
634+
K8S_VERSION_NORMALIZED=$(echo "${{ matrix.k8s-version }}" | tr '.' '-')
635+
CLUSTER_NAME="${{ needs.setup.outputs.channel-name }}-$K8S_VERSION_NORMALIZED-${{ matrix.distribution }}"
636+
637+
CLUSTER_ID="${{ steps.set-cluster-outputs.outputs.cluster-id }}"
638+
639+
# Create cluster matrix entry
640+
CLUSTER_ENTRY='{"k8s-version":"${{ matrix.k8s-version }}","distribution":"${{ matrix.distribution }}","nodes":${{ matrix.nodes }},"instance-type":"${{ matrix.instance-type }}","timeout-minutes":${{ matrix.timeout-minutes }},"cluster-id":"'$CLUSTER_ID'","cluster-name":"'$CLUSTER_NAME'"}'
641+
642+
echo "cluster-matrix=$CLUSTER_ENTRY" >> $GITHUB_OUTPUT
643+
echo "Created cluster matrix entry: $CLUSTER_ENTRY"
644+
645+
test-deployment:
646+
runs-on: ubuntu-22.04
647+
needs: [setup, create-resources, create-clusters]
648+
strategy:
649+
matrix:
650+
include:
651+
# k3s single-node configurations (latest patch versions)
652+
- k8s-version: "v1.31.10"
653+
distribution: "k3s"
654+
nodes: 1
655+
instance-type: "r1.small"
656+
timeout-minutes: 15
657+
- k8s-version: "v1.32.6"
658+
distribution: "k3s"
659+
nodes: 1
660+
instance-type: "r1.small"
661+
timeout-minutes: 15
662+
# k3s multi-node configurations
663+
- k8s-version: "v1.32.6"
664+
distribution: "k3s"
665+
nodes: 3
666+
instance-type: "r1.medium"
667+
timeout-minutes: 20
668+
# kind configurations (maximum 1 node supported, distribution-specific patch versions)
669+
- k8s-version: "v1.31.9"
670+
distribution: "kind"
671+
nodes: 1
672+
instance-type: "r1.small"
673+
timeout-minutes: 20
674+
- k8s-version: "v1.32.5"
675+
distribution: "kind"
676+
nodes: 1
677+
instance-type: "r1.small"
678+
timeout-minutes: 20
679+
# EKS configurations (major.minor versions only)
680+
- k8s-version: "v1.31"
681+
distribution: "eks"
682+
nodes: 2
683+
instance-type: "c5.large"
684+
timeout-minutes: 30
685+
- k8s-version: "v1.32"
686+
distribution: "eks"
687+
nodes: 2
688+
instance-type: "c5.large"
689+
timeout-minutes: 30
690+
exclude: []
691+
fail-fast: false
692+
max-parallel: 7 # Allow all tests to run in parallel
693+
steps:
694+
- name: Set concurrency group
695+
run: |
696+
echo "CONCURRENCY_GROUP=test-${{ needs.setup.outputs.channel-name }}-${{ matrix.k8s-version }}-${{ matrix.distribution }}" >> $GITHUB_ENV
697+
echo "Starting test job: ${{ matrix.k8s-version }}-${{ matrix.distribution }}-${{ matrix.nodes }}nodes"
698+
699+
- name: Checkout code
700+
uses: actions/checkout@v4
701+
702+
- name: Setup tools
703+
uses: ./.github/actions/setup-tools
704+
with:
705+
helm-version: ${{ env.HELM_VERSION }}
706+
install-helmfile: 'true'
707+
708+
- name: Get cluster kubeconfig
709+
shell: bash
710+
run: |
711+
# Normalize cluster name to match task expectations (replace dots with dashes)
712+
K8S_VERSION_NORMALIZED=$(echo "${{ matrix.k8s-version }}" | tr '.' '-')
713+
CLUSTER_NAME="${{ needs.setup.outputs.channel-name }}-$K8S_VERSION_NORMALIZED-${{ matrix.distribution }}"
714+
715+
echo "Getting kubeconfig for cluster: $CLUSTER_NAME"
716+
717+
# Get kubeconfig using replicated CLI
718+
replicated cluster kubeconfig --name "$CLUSTER_NAME" --output-path /tmp/kubeconfig
719+
720+
if [ ! -f /tmp/kubeconfig ] || [ ! -s /tmp/kubeconfig ]; then
721+
echo "ERROR: Failed to get kubeconfig for cluster $CLUSTER_NAME"
722+
echo "Available clusters:"
723+
replicated cluster ls
724+
exit 1
725+
fi
726+
727+
echo "KUBECONFIG=/tmp/kubeconfig" >> $GITHUB_ENV
728+
echo "Successfully retrieved kubeconfig for cluster $CLUSTER_NAME"
729+
730+
- name: Deploy application
731+
working-directory: ${{ env.APP_DIR }}
732+
run: |
733+
# Normalize cluster name to match task expectations (replace dots with dashes)
734+
K8S_VERSION_NORMALIZED=$(echo "${{ matrix.k8s-version }}" | tr '.' '-')
735+
CLUSTER_NAME="${{ needs.setup.outputs.channel-name }}-$K8S_VERSION_NORMALIZED-${{ matrix.distribution }}"
736+
task customer-helm-install \
737+
CUSTOMER_NAME="${{ needs.setup.outputs.customer-name }}" \
738+
CLUSTER_NAME="$CLUSTER_NAME" \
739+
CHANNEL_SLUG="${{ needs.create-resources.outputs.channel-slug }}" \
740+
REPLICATED_LICENSE_ID="${{ needs.create-resources.outputs.license-id }}"
741+
timeout-minutes: ${{ matrix.timeout-minutes }}
742+
743+
- name: Run tests
744+
working-directory: ${{ env.APP_DIR }}
745+
run: task test
746+
timeout-minutes: 10
747+
748+
- name: Run distribution-specific tests
749+
run: |
750+
echo "Running ${{ matrix.distribution }}-specific tests..."
751+
752+
# Test node configuration based on matrix
753+
EXPECTED_NODES=${{ matrix.nodes }}
754+
ACTUAL_NODES=$(kubectl get nodes --no-headers | wc -l)
755+
756+
if [ "$ACTUAL_NODES" -eq "$EXPECTED_NODES" ]; then
757+
echo "✅ Node count validation passed: $ACTUAL_NODES/$EXPECTED_NODES"
758+
else
759+
echo "❌ Node count validation failed: $ACTUAL_NODES/$EXPECTED_NODES"
760+
exit 1
761+
fi
762+
763+
# Distribution-specific storage tests
764+
case "${{ matrix.distribution }}" in
765+
"k3s")
766+
echo "Testing k3s local-path storage..."
767+
kubectl get storageclass local-path -o yaml | grep provisioner | grep rancher.io/local-path
768+
;;
769+
"kind")
770+
echo "Testing kind standard storage..."
771+
kubectl get storageclass standard -o yaml | grep provisioner | grep rancher.io/local-path
772+
;;
773+
"eks")
774+
echo "Testing EKS GP2 storage..."
775+
kubectl get storageclass gp2 -o yaml | grep provisioner | grep ebs.csi.aws.com || echo "EKS storage validation skipped"
776+
;;
777+
esac
778+
779+
# Test cluster resources
780+
echo "Cluster resource utilization:"
781+
kubectl top nodes --no-headers 2>/dev/null || echo "Metrics not available"
782+
783+
echo "Pod distribution across nodes:"
784+
kubectl get pods -A -o wide | awk '{print $7}' | sort | uniq -c
785+
786+
# Performance monitoring
787+
echo "=== Performance Metrics ==="
788+
echo "Test Environment: ${{ matrix.distribution }} ${{ matrix.k8s-version }} (${{ matrix.nodes }} nodes)"
789+
echo "Instance Type: ${{ matrix.instance-type }}"
790+
echo "Deployment Timeout: ${{ matrix.timeout-minutes }} minutes"
791+
792+
# Resource consumption validation
793+
echo "=== Resource Validation ==="
794+
kubectl describe nodes | grep -E "(Name:|Allocatable:|Allocated resources:)" | head -20
795+
796+
# Collect performance timings
797+
echo "=== Test Completion Summary ==="
798+
echo "Matrix Job: ${{ matrix.k8s-version }}-${{ matrix.distribution }}-${{ matrix.nodes }}nodes"
799+
echo "Started: $(date -u)"
800+
echo "Status: Complete"
801+
802+
- name: Upload debug logs
803+
if: failure()
804+
uses: actions/upload-artifact@v4
805+
with:
806+
name: debug-logs-${{ github.run_number }}-${{ matrix.k8s-version }}-${{ matrix.distribution }}
807+
path: |
808+
/tmp/*.log
809+
~/.replicated/
810+
630811
- name: Deploy application
631812
working-directory: ${{ env.APP_DIR }}
632813
run: |

0 commit comments

Comments
 (0)