Skip to content

Commit 65d0034

Browse files
jackfrancisnojnhuh
authored andcommitted
ci-entrypoint.sh: Incrementally scale large clusters
Co-Authored-By: Jack Francis <[email protected]>
1 parent 6e606c6 commit 65d0034

File tree

1 file changed

+28
-17
lines changed

1 file changed

+28
-17
lines changed

scripts/ci-entrypoint.sh

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,15 @@ KIND="${REPO_ROOT}/hack/tools/bin/kind"
3030
KUSTOMIZE="${REPO_ROOT}/hack/tools/bin/kustomize"
3131
make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${HELM##*/}" "${KIND##*/}" "${KUSTOMIZE##*/}"
3232
KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-capz}"
33-
WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
33+
EXTRA_NODES_PER_SCALEOUT="${EXTRA_NODES_PER_SCALEOUT:-100}"
34+
export TOTAL_WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
35+
WORKER_MACHINE_COUNT=0
3436
export KIND_CLUSTER_NAME
3537
# export the variables so they are available in bash -c wait_for_nodes below
3638
export KUBECTL
3739
export HELM
40+
export REPO_ROOT
41+
export EXTRA_NODES_PER_SCALEOUT
3842

3943
# shellcheck source=hack/ensure-go.sh
4044
source "${REPO_ROOT}/hack/ensure-go.sh"
@@ -95,7 +99,7 @@ setup() {
9599
echo ''
96100
)}"
97101
export AZURE_RESOURCE_GROUP="${CLUSTER_NAME}"
98-
if [ "${WORKER_MACHINE_COUNT}" -gt "10" ]; then
102+
if [ "${TOTAL_WORKER_MACHINE_COUNT}" -gt "10" ]; then
99103
export AZURE_LOCATION="${AZURE_LOCATION:-$(capz::util::get_random_region_load)}"
100104
echo "Using AZURE_LOCATION: ${AZURE_LOCATION}"
101105
else
@@ -112,7 +116,7 @@ setup() {
112116
# Need a cluster with at least 2 nodes
113117
export CONTROL_PLANE_MACHINE_COUNT="${CONTROL_PLANE_MACHINE_COUNT:-1}"
114118
export CCM_COUNT="${CCM_COUNT:-1}"
115-
export WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
119+
export WORKER_MACHINE_COUNT
116120
export EXP_CLUSTER_RESOURCE_SET="true"
117121

118122
# TODO figure out a better way to account for expected Windows node count
@@ -180,19 +184,26 @@ wait_for_copy_kubeadm_config_map() {
180184

181185
# wait_for_nodes returns when all nodes in the workload cluster are Ready.
182186
wait_for_nodes() {
183-
echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready"
184-
185-
# Ensure that all nodes are registered with the API server before checking for readiness
186-
local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))"
187-
while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do
188-
sleep 10
189-
done
190-
191-
until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do
192-
sleep 5
193-
done
194-
until "${KUBECTL}" get nodes -o wide; do
195-
sleep 5
187+
while ((WORKER_MACHINE_COUNT < TOTAL_WORKER_MACHINE_COUNT)); do
188+
WORKER_MACHINE_COUNT=$((WORKER_MACHINE_COUNT + EXTRA_NODES_PER_SCALEOUT))
189+
WORKER_MACHINE_COUNT=$((WORKER_MACHINE_COUNT > TOTAL_WORKER_MACHINE_COUNT ? TOTAL_WORKER_MACHINE_COUNT : WORKER_MACHINE_COUNT))
190+
191+
"${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" scale --namespace default machinedeployment/"${CLUSTER_NAME}"-md-0 --replicas="${WORKER_MACHINE_COUNT}"
192+
193+
echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready"
194+
195+
# Ensure that all nodes are registered with the API server before checking for readiness
196+
local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))"
197+
while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do
198+
sleep 10
199+
done
200+
201+
until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do
202+
sleep 5
203+
done
204+
until "${KUBECTL}" get nodes -o wide; do
205+
sleep 5
206+
done
196207
done
197208
}
198209

@@ -221,7 +232,7 @@ install_addons() {
221232
# we need to wait a little bit for nodes and pods terminal state,
222233
# so we block successful return upon the cluster being fully operational.
223234
export -f wait_for_nodes
224-
timeout --foreground 1800 bash -c wait_for_nodes
235+
timeout --foreground "$((TOTAL_WORKER_MACHINE_COUNT > 100 ? 10800 : 1800))" bash -c wait_for_nodes
225236
export -f wait_for_pods
226237
timeout --foreground 1800 bash -c wait_for_pods
227238
}

0 commit comments

Comments
 (0)