@@ -131,36 +131,42 @@ create_cluster() {
131131 " ${REPO_ROOT} /hack/create-dev-cluster.sh"
132132}
133133
134- install_addons () {
134+ # get_cidrs derives the CIDR from the Cluster's '.spec.clusterNetwork.pods.cidrBlocks' metadata
135+ # any retryable operation in this function must return a non-zero exit code on failure so that we can
136+ # retry it using a `until get_cidrs; do sleep 5; done` pattern;
137+ # and any statement must be idempotent so that subsequent retry attempts can make forward progress.
138+ get_cidrs () {
135139 # Get cluster CIDRs from Cluster object
136140 CIDR0=$( ${KUBECTL} get cluster " ${CLUSTER_NAME} " -o=jsonpath=' {.spec.clusterNetwork.pods.cidrBlocks[0]}' )
137- CIDR1=$( ${KUBECTL} get cluster " ${CLUSTER_NAME} " -o=jsonpath=' {.spec.clusterNetwork.pods.cidrBlocks[1]}' 2> /dev/null) || true
138-
139- # export the target cluster KUBECONFIG if not already set
140- export KUBECONFIG=" ${KUBECONFIG:- ${PWD} / kubeconfig} "
141-
142- # wait for the apiserver pod to be Ready.
143- APISERVER_POD=$( " ${KUBECTL} " get pods -n kube-system -o name | grep apiserver)
144- " ${KUBECTL} " wait --for=condition=Ready -n kube-system " ${APISERVER_POD} " --timeout=5m
145-
146- # Copy the kubeadm configmap to the calico-system namespace. This is a workaround needed for the calico-node-windows daemonset to be able to run in the calico-system namespace.
147- " ${KUBECTL} " create ns calico-system
148- until " ${KUBECTL} " get configmap kubeadm-config --namespace=kube-system
149- do
150- # Wait for the kubeadm-config configmap to exist.
151- sleep 2
152- done
153- " ${KUBECTL} " get configmap kubeadm-config --namespace=kube-system -o yaml \
154- | sed ' s/namespace: kube-system/namespace: calico-system/' \
155- | " ${KUBECTL} " create -f -
141+ export CIDR0
142+ CIDR_LENGTH=$( ${KUBECTL} get cluster " ${CLUSTER_NAME} " -o=jsonpath=' {.spec.clusterNetwork.pods.cidrBlocks}' | jq ' . | length' )
143+ if [[ " ${CIDR_LENGTH} " == " 2" ]]; then
144+ CIDR1=$( ${KUBECTL} get cluster " ${CLUSTER_NAME} " -o=jsonpath=' {.spec.clusterNetwork.pods.cidrBlocks[1]}' )
145+ export CIDR1
146+ fi
147+ }
156148
149+ # install_calico installs Calico CNI componentry onto the Cluster
150+ # any retryable operation in this function must return a non-zero exit code on failure so that we can
151+ # retry it using a `until install_calico; do sleep 5; done` pattern;
152+ # and any statement must be idempotent so that subsequent retry attempts can make forward progress.
153+ install_calico () {
154+ # Copy the kubeadm configmap to the calico-system namespace.
155+ # This is a workaround needed for the calico-node-windows daemonset
156+ # to be able to run in the calico-system namespace.
157+ " ${KUBECTL} " create namespace calico-system --dry-run=client -o yaml | kubectl apply -f -
158+ if ! " ${KUBECTL} " get configmap kubeadm-config --namespace=calico-system; then
159+ " ${KUBECTL} " get configmap kubeadm-config --namespace=kube-system -o yaml > kubeadm-config-kube-system
160+ sed ' s/namespace: kube-system/namespace: calico-system/' kubeadm-config-kube-system | " ${KUBECTL} " apply -f -
161+ rm kubeadm-config-kube-system
162+ fi
157163 # install Calico CNI
158164 echo " Installing Calico CNI via helm"
159- if [[ " ${CIDR0} " =~ .* :.* ]]; then
165+ if [[ " ${CIDR0:- } " =~ .* :.* ]]; then
160166 echo " Cluster CIDR is IPv6"
161167 CALICO_VALUES_FILE=" ${REPO_ROOT} /templates/addons/calico-ipv6/values.yaml"
162168 CIDR_STRING_VALUES=" installation.calicoNetwork.ipPools[0].cidr=${CIDR0} "
163- elif [[ " ${CIDR1} " =~ .* :.* ]]; then
169+ elif [[ " ${CIDR1:- } " =~ .* :.* ]]; then
164170 echo " Cluster CIDR is dual-stack"
165171 CALICO_VALUES_FILE=" ${REPO_ROOT} /templates/addons/calico-dual-stack/values.yaml"
166172 CIDR_STRING_VALUES=" installation.calicoNetwork.ipPools[0].cidr=${CIDR0} ,installation.calicoNetwork.ipPools[1].cidr=${CIDR1} "
@@ -169,60 +175,49 @@ install_addons() {
169175 CALICO_VALUES_FILE=" ${REPO_ROOT} /templates/addons/calico/values.yaml"
170176 CIDR_STRING_VALUES=" installation.calicoNetwork.ipPools[0].cidr=${CIDR0} "
171177 fi
178+ " ${HELM} " upgrade calico --install --repo https://projectcalico.docs.tigera.io/charts tigera-operator -f " ${CALICO_VALUES_FILE} " --set-string " ${CIDR_STRING_VALUES} " --namespace calico-system
179+ }
172180
173- " ${HELM} " repo add projectcalico https://projectcalico.docs.tigera.io/charts
174- " ${HELM} " install calico projectcalico/tigera-operator -f " ${CALICO_VALUES_FILE} " --set-string " ${CIDR_STRING_VALUES} " --namespace tigera-operator --create-namespace
175-
176- # Add FeatureOverride for ChecksumOffloadBroken in FelixConfiguration.
177- # This is the recommended workaround for https://github.com/projectcalico/calico/issues/3145.
178- " ${KUBECTL} " apply -f " ${REPO_ROOT} " /templates/addons/calico/felix-override.yaml
179-
180- # install cloud-provider-azure components, if using out-of-tree
181- if [[ -n " ${TEST_CCM:- } " ]]; then
182- CLOUD_CONFIG=" /etc/kubernetes/azure.json"
183- CONFIG_SECRET_NAME=" "
184- ENABLE_DYNAMIC_RELOADING=false
185- if [[ -n " ${LOAD_CLOUD_CONFIG_FROM_SECRET:- } " ]]; then
186- CLOUD_CONFIG=" "
187- CONFIG_SECRET_NAME=" azure-cloud-provider"
188- ENABLE_DYNAMIC_RELOADING=true
189- copy_secret
190- fi
191-
192- CCM_CLUSTER_CIDR=" ${CIDR0} "
193- if [[ -n " ${CIDR1} " ]]; then
194- CCM_CLUSTER_CIDR=" ${CIDR0} \,${CIDR1} "
195- fi
196- echo " CCM cluster CIDR: ${CCM_CLUSTER_CIDR:- } "
197-
198- export CCM_LOG_VERBOSITY=" ${CCM_LOG_VERBOSITY:- 4} "
199- echo " Installing cloud-provider-azure components via helm"
200- " ${HELM} " install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure --generate-name \
201- --set infra.clusterName=" ${CLUSTER_NAME} " \
202- --set cloudControllerManager.imageRepository=" ${IMAGE_REGISTRY} " \
203- --set cloudNodeManager.imageRepository=" ${IMAGE_REGISTRY} " \
204- --set cloudControllerManager.imageName=" ${CCM_IMAGE_NAME} " \
205- --set cloudNodeManager.imageName=" ${CNM_IMAGE_NAME} " \
206- --set-string cloudControllerManager.imageTag=" ${IMAGE_TAG} " \
207- --set-string cloudNodeManager.imageTag=" ${IMAGE_TAG} " \
208- --set cloudControllerManager.replicas=" ${CCM_COUNT} " \
209- --set cloudControllerManager.enableDynamicReloading=" ${ENABLE_DYNAMIC_RELOADING} " \
210- --set cloudControllerManager.cloudConfig=" ${CLOUD_CONFIG} " \
211- --set cloudControllerManager.cloudConfigSecretName=" ${CONFIG_SECRET_NAME} " \
212- --set cloudControllerManager.logVerbosity=" ${CCM_LOG_VERBOSITY} " \
213- --set-string cloudControllerManager.clusterCIDR=" ${CCM_CLUSTER_CIDR} "
181+ # install_cloud_provider_azure installs OOT cloud-provider-azure componentry onto the Cluster.
182+ # Any retryable operation in this function must return a non-zero exit code on failure so that we can
183+ # retry it using a `until install_cloud_provider_azure; do sleep 5; done` pattern;
184+ # and any statement must be idempotent so that subsequent retry attempts can make forward progress.
185+ install_cloud_provider_azure () {
186+ CLOUD_CONFIG=" /etc/kubernetes/azure.json"
187+ CONFIG_SECRET_NAME=" "
188+ ENABLE_DYNAMIC_RELOADING=false
189+ if [[ -n " ${LOAD_CLOUD_CONFIG_FROM_SECRET:- } " ]]; then
190+ CLOUD_CONFIG=" "
191+ CONFIG_SECRET_NAME=" azure-cloud-provider"
192+ ENABLE_DYNAMIC_RELOADING=true
193+ copy_secret
214194 fi
215195
216- export -f wait_for_nodes
217- timeout --foreground 1800 bash -c wait_for_nodes
218-
219- echo " Waiting for all calico-system pods to be ready"
220- " ${KUBECTL} " wait --for=condition=Ready pod -n calico-system --all --timeout=10m
221-
222- echo " Waiting for all kube-system pods to be ready"
223- " ${KUBECTL} " wait --for=condition=Ready pod -n kube-system --all --timeout=10m
196+ CCM_CLUSTER_CIDR=" ${CIDR0} "
197+ if [[ -n " ${CIDR1} " ]]; then
198+ CCM_CLUSTER_CIDR=" ${CIDR0} \,${CIDR1} "
199+ fi
200+ echo " CCM cluster CIDR: ${CCM_CLUSTER_CIDR:- } "
201+
202+ export CCM_LOG_VERBOSITY=" ${CCM_LOG_VERBOSITY:- 4} "
203+ echo " Installing cloud-provider-azure components via helm"
204+ " ${HELM} " upgrade cloud-provider-azure --install --repo https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo cloud-provider-azure \
205+ --set infra.clusterName=" ${CLUSTER_NAME} " \
206+ --set cloudControllerManager.imageRepository=" ${IMAGE_REGISTRY} " \
207+ --set cloudNodeManager.imageRepository=" ${IMAGE_REGISTRY} " \
208+ --set cloudControllerManager.imageName=" ${CCM_IMAGE_NAME} " \
209+ --set cloudNodeManager.imageName=" ${CNM_IMAGE_NAME} " \
210+ --set-string cloudControllerManager.imageTag=" ${IMAGE_TAG} " \
211+ --set-string cloudNodeManager.imageTag=" ${IMAGE_TAG} " \
212+ --set cloudControllerManager.replicas=" ${CCM_COUNT} " \
213+ --set cloudControllerManager.enableDynamicReloading=" ${ENABLE_DYNAMIC_RELOADING} " \
214+ --set cloudControllerManager.cloudConfig=" ${CLOUD_CONFIG} " \
215+ --set cloudControllerManager.cloudConfigSecretName=" ${CONFIG_SECRET_NAME} " \
216+ --set cloudControllerManager.logVerbosity=" ${CCM_LOG_VERBOSITY} " \
217+ --set-string cloudControllerManager.clusterCIDR=" ${CCM_CLUSTER_CIDR} "
224218}
225219
220+ # wait_for_nodes returns when all nodes in the workload cluster are Ready.
226221wait_for_nodes () {
227222 echo " Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s), ${WORKER_MACHINE_COUNT} worker machine(s), and ${WINDOWS_WORKER_MACHINE_COUNT:- 0} windows machine(s) to become Ready"
228223
@@ -232,8 +227,54 @@ wait_for_nodes() {
232227 sleep 10
233228 done
234229
235- " ${KUBECTL} " wait --for=condition=Ready node --all --timeout=5m
236- " ${KUBECTL} " get nodes -owide
230+ until " ${KUBECTL} " wait --for=condition=Ready node --all --timeout=15m; do
231+ sleep 5
232+ done
233+ until " ${KUBECTL} " get nodes -o wide; do
234+ sleep 5
235+ done
236+ }
237+
238+ # wait_for_pods returns when all pods on the workload cluster are Running.
239+ wait_for_pods () {
240+ echo " Waiting for all pod init containers scheduled in the cluster to be ready"
241+ while " ${KUBECTL} " get pods --all-namespaces -o jsonpath=" {.items[*].status.initContainerStatuses[*].ready}" | grep -q false ; do
242+ echo " Not all pod init containers are Ready...."
243+ sleep 5
244+ done
245+
246+ echo " Waiting for all pod containers scheduled in the cluster to be ready"
247+ while " ${KUBECTL} " get pods --all-namespaces -o jsonpath=" {.items[*].status.containerStatuses[*].ready}" | grep -q false ; do
248+ echo " Not all pod containers are Ready...."
249+ sleep 5
250+ done
251+ until " ${KUBECTL} " get pods --all-namespaces -o wide; do
252+ sleep 5
253+ done
254+ }
255+
256+ install_addons () {
257+ until get_cidrs; do
258+ sleep 5
259+ done
260+ # export the target cluster KUBECONFIG if not already set
261+ export KUBECONFIG=" ${KUBECONFIG:- ${PWD} / kubeconfig} "
262+ until install_calico; do
263+ sleep 5
264+ done
265+ # install cloud-provider-azure components, if using out-of-tree
266+ if [[ -n " ${TEST_CCM:- } " ]]; then
267+ until install_cloud_provider_azure; do
268+ sleep 5
269+ done
270+ fi
271+ # In order to determine the successful outcome of CNI and cloud-provider-azure,
272+ # we need to wait a little bit for nodes and pods terminal state,
273+ # so we block successful return upon the cluster being fully operational.
274+ export -f wait_for_nodes
275+ timeout --foreground 1800 bash -c wait_for_nodes
276+ export -f wait_for_pods
277+ timeout --foreground 1800 bash -c wait_for_pods
237278}
238279
239280copy_secret () {
@@ -256,10 +297,9 @@ cleanup() {
256297
257298on_exit () {
258299 if [[ -n ${KUBECONFIG:- } ]]; then
259- " ${KUBECTL} " get nodes -owide || echo " Unable to get nodes"
260- " ${KUBECTL} " get pods -A -owide || echo " Unable to get pods"
300+ " ${KUBECTL} " get nodes -o wide || echo " Unable to get nodes"
301+ " ${KUBECTL} " get pods -A -o wide || echo " Unable to get pods"
261302 fi
262-
263303 # unset kubeconfig which is currently pointing at workload cluster.
264304 # we want to be pointing at the management cluster (kind in this case)
265305 unset KUBECONFIG
@@ -282,6 +322,7 @@ create_cluster
282322
283323# install CNI and CCM
284324install_addons
325+ echo " Cluster ${CLUSTER_NAME} created and fully operational"
285326
286327if [[ " ${# } " -gt 0 ]]; then
287328 # disable error exit so we can run post-command cleanup
0 commit comments