Skip to content

Commit 6ca5e0e

Browse files
committed
local-up-cluster kube-proxy terminated error
When using hack/local-up-cluster.sh deploy local cluster, it failed with following message "kube-proxy terminated unexpectedly" and "Failed to retrieve node info: nodes "127.0.0.1" not found" in kube-proxy.log. The root reason for this error is miss boot order of kubernetes services in local-up-cluster.sh, kube-proxy and kubectl daemon. When starting kube-proxy, it would check node information. And these information are collected by kubelet daemon. However, in the shell script, kube-proxy service start before kubelet daemon. This patch changed the boot order of kubelet daemon and kube-proxy and check if node stats ready for kube-proxy start. Signed-off-by: Howard Zhang <[email protected]>
1 parent 6cb788c commit 6ca5e0e

File tree

2 files changed

+40
-3
lines changed

2 files changed

+40
-3
lines changed

hack/lib/util.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,24 @@ kube::util::wait_for_url() {
6464
return 1
6565
}
6666

67+
# Example: kube::util::wait_for_success 120 5 "kubectl get nodes|grep localhost"
68+
# arguments: wait time, sleep time, shell command
69+
# returns 0 if the shell command get output, 1 otherwise.
70+
kube::util::wait_for_success(){
71+
local wait_time="$1"
72+
local sleep_time="$2"
73+
local cmd="$3"
74+
while [ "$wait_time" -gt 0 ]; do
75+
if eval "$cmd"; then
76+
return 0
77+
else
78+
sleep "$sleep_time"
79+
wait_time=$((wait_time-sleep_time))
80+
fi
81+
done
82+
return 1
83+
}
84+
6785
# Example: kube::util::trap_add 'echo "in trap DEBUG"' DEBUG
6886
# See: http://stackoverflow.com/questions/3338030/multiple-bash-traps-for-the-same-signal
6987
kube::util::trap_add() {

hack/local-up-cluster.sh

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,19 @@ function start_cloud_controller_manager {
670670
export CLOUD_CTLRMGR_PID=$!
671671
}
672672

673+
function wait_node_ready(){
674+
# check the nodes information after kubelet daemon start
675+
local nodes_stats="${KUBECTL} --kubeconfig '${CERT_DIR}/admin.kubeconfig' get nodes"
676+
local node_name=$KUBELET_HOST
677+
local system_node_wait_time=30
678+
local interval_time=2
679+
kube::util::wait_for_success "$system_node_wait_time" "$interval_time" "$nodes_stats | grep $node_name"
680+
if [ $? == "1" ]; then
681+
echo "time out on waiting $node_name info"
682+
exit 1
683+
fi
684+
}
685+
673686
function start_kubelet {
674687
KUBELET_LOG=${LOG_DIR}/kubelet.log
675688
mkdir -p "${POD_MANIFEST_PATH}" &>/dev/null || sudo mkdir -p "${POD_MANIFEST_PATH}"
@@ -784,6 +797,10 @@ function start_kubelet {
784797
function start_kubeproxy {
785798
PROXY_LOG=${LOG_DIR}/kube-proxy.log
786799

800+
# wait for kubelet collect node information
801+
echo "wait kubelet ready"
802+
wait_node_ready
803+
787804
cat <<EOF > /tmp/kube-proxy.yaml
788805
apiVersion: kubeproxy.config.k8s.io/v1alpha1
789806
kind: KubeProxyConfiguration
@@ -997,9 +1014,6 @@ if [[ "${START_MODE}" != "kubeletonly" ]]; then
9971014
if [[ "${EXTERNAL_CLOUD_PROVIDER:-}" == "true" ]]; then
9981015
start_cloud_controller_manager
9991016
fi
1000-
if [[ "${START_MODE}" != "nokubeproxy" ]]; then
1001-
start_kubeproxy
1002-
fi
10031017
start_kubescheduler
10041018
start_kubedns
10051019
if [[ "${ENABLE_NODELOCAL_DNS:-}" == "true" ]]; then
@@ -1025,6 +1039,11 @@ if [[ "${START_MODE}" != "nokubelet" ]]; then
10251039
esac
10261040
fi
10271041

1042+
if [[ "${START_MODE}" != "kubeletonly" ]]; then
1043+
if [[ "${START_MODE}" != "nokubeproxy" ]]; then
1044+
start_kubeproxy
1045+
fi
1046+
fi
10281047
if [[ -n "${PSP_ADMISSION}" && "${AUTHORIZATION_MODE}" = *RBAC* ]]; then
10291048
create_psp_policy
10301049
fi

0 commit comments

Comments
 (0)