Skip to content

Commit a2879a5

Browse files
jackfrancisnojnhuh
authored andcommitted
ci-entrypoint.sh: Incrementally scale large clusters
Co-Authored-By: Jack Francis <[email protected]>
1 parent 6e606c6 commit a2879a5

File tree

1 file changed

+27
-17
lines changed

1 file changed

+27
-17
lines changed

scripts/ci-entrypoint.sh

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,14 @@ KIND="${REPO_ROOT}/hack/tools/bin/kind"
3030
KUSTOMIZE="${REPO_ROOT}/hack/tools/bin/kustomize"
3131
make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${HELM##*/}" "${KIND##*/}" "${KUSTOMIZE##*/}"
3232
KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-capz}"
33-
WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
33+
EXTRA_NODES_PER_SCALEOUT="${EXTRA_NODES_PER_SCALEOUT:-100}"
34+
export TOTAL_WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
35+
WORKER_MACHINE_COUNT=0
3436
export KIND_CLUSTER_NAME
3537
# export the variables so they are available in bash -c wait_for_nodes below
3638
export KUBECTL
3739
export HELM
40+
export REPO_ROOT
3841

3942
# shellcheck source=hack/ensure-go.sh
4043
source "${REPO_ROOT}/hack/ensure-go.sh"
@@ -95,7 +98,7 @@ setup() {
9598
echo ''
9699
)}"
97100
export AZURE_RESOURCE_GROUP="${CLUSTER_NAME}"
98-
if [ "${WORKER_MACHINE_COUNT}" -gt "10" ]; then
101+
if [ "${TOTAL_WORKER_MACHINE_COUNT}" -gt "10" ]; then
99102
export AZURE_LOCATION="${AZURE_LOCATION:-$(capz::util::get_random_region_load)}"
100103
echo "Using AZURE_LOCATION: ${AZURE_LOCATION}"
101104
else
@@ -112,7 +115,7 @@ setup() {
112115
# Need a cluster with at least 2 nodes
113116
export CONTROL_PLANE_MACHINE_COUNT="${CONTROL_PLANE_MACHINE_COUNT:-1}"
114117
export CCM_COUNT="${CCM_COUNT:-1}"
115-
export WORKER_MACHINE_COUNT="${WORKER_MACHINE_COUNT:-2}"
118+
export WORKER_MACHINE_COUNT
116119
export EXP_CLUSTER_RESOURCE_SET="true"
117120

118121
# TODO figure out a better way to account for expected Windows node count
@@ -180,19 +183,26 @@ wait_for_copy_kubeadm_config_map() {
180183

181184
# wait_for_nodes returns when all nodes in the workload cluster are Ready.
182185
wait_for_nodes() {
183-
echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready"
184-
185-
# Ensure that all nodes are registered with the API server before checking for readiness
186-
local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))"
187-
while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do
188-
sleep 10
189-
done
190-
191-
until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do
192-
sleep 5
193-
done
194-
until "${KUBECTL}" get nodes -o wide; do
195-
sleep 5
186+
while ((WORKER_MACHINE_COUNT < TOTAL_WORKER_MACHINE_COUNT)); do
187+
WORKER_MACHINE_COUNT=$((WORKER_MACHINE_COUNT + EXTRA_NODES_PER_SCALEOUT))
188+
WORKER_MACHINE_COUNT=$((WORKER_MACHINE_COUNT > TOTAL_WORKER_MACHINE_COUNT ? TOTAL_WORKER_MACHINE_COUNT : WORKER_MACHINE_COUNT))
189+
190+
"${KUBECTL}" --kubeconfig "${REPO_ROOT}/${KIND_CLUSTER_NAME}.kubeconfig" scale machinedeployment/"${CLUSTER_NAME}"-md-0 --replicas="${WORKER_MACHINE_COUNT}"
191+
192+
echo "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:-0} windows machine(s) to become Ready"
193+
194+
# Ensure that all nodes are registered with the API server before checking for readiness
195+
local total_nodes="$((CONTROL_PLANE_MACHINE_COUNT + WORKER_MACHINE_COUNT + WINDOWS_WORKER_MACHINE_COUNT))"
196+
while [[ $("${KUBECTL}" get nodes -ojson | jq '.items | length') -ne "${total_nodes}" ]]; do
197+
sleep 10
198+
done
199+
200+
until "${KUBECTL}" wait --for=condition=Ready node --all --timeout=15m; do
201+
sleep 5
202+
done
203+
until "${KUBECTL}" get nodes -o wide; do
204+
sleep 5
205+
done
196206
done
197207
}
198208

@@ -221,7 +231,7 @@ install_addons() {
221231
# we need to wait a little bit for nodes and pods terminal state,
222232
# so we block successful return upon the cluster being fully operational.
223233
export -f wait_for_nodes
224-
timeout --foreground 1800 bash -c wait_for_nodes
234+
timeout --foreground "$((TOTAL_WORKER_MACHINE_COUNT > 100 ? 10800 : 1800))" bash -c wait_for_nodes
225235
export -f wait_for_pods
226236
timeout --foreground 1800 bash -c wait_for_pods
227237
}

0 commit comments

Comments
 (0)