@@ -6,31 +6,212 @@ read -s -u 3 3< /var/lib/secrets/dbpassword MYSQL_PWD || true
66export MYSQL_PWD
77
88PROBE_USER=root
9- function mysql_status_check {
9+
10+ MYSQL_SOCKET=/var/lib/mysql/mysql.sock
11+ SST_IN_PROGRESS=/var/lib/mysql/sst_in_progress
12+
13+ CHECK_RETRY=10
14+ CHECK_WAIT=0.5
15+ STARTUP_WAIT=2
16+
17+ LAST_STATE=" "
18+ function log_state {
19+ local state=" $1 "
20+ # do not duplicate error logs in the probe, to minimize the
21+ # output in k8s events in case the probe fails
22+ if [ " ${LAST_STATE} " != " ${state} " ]; then
23+ LAST_STATE=" ${state} "
24+ fi
25+ }
26+
27+ function log_last_state {
28+ if [ -n " ${LAST_STATE} " ]; then
29+ echo " ${LAST_STATE} "
30+ fi
31+ }
32+ trap log_last_state EXIT
33+
34+ function get_mysql_status {
35+ local status=$1
36+ local i
37+ local out
38+ for i in $( seq $CHECK_RETRY ) ; do
39+ out=$( mysql -u${PROBE_USER} -sNEe " show status like '${status} ';" 2>&1 )
40+ if [ $? -eq 0 ]; then
41+ echo " ${out} " | tail -1
42+ return 0
43+ else
44+ sleep ${CHECK_WAIT}
45+ fi
46+ done
47+ # if we pass here, log the last error from mysql
48+ echo " ${out} " >&2
49+ return 1
50+ }
51+
52+ function check_mysql_status {
1053 local status=$1
1154 local expect=$2
12- set -x
13- mysql -u${PROBE_USER} -sNEe " show status like '${status} ';" | tail -1 | grep -w -e " ${expect} "
55+ local val
56+ local rc
57+
58+ val=$( get_mysql_status " ${status} " )
59+ test " ${val} " = " ${expect} "
60+ rc=$?
61+ if [ $rc -ne 0 ]; then
62+ log_state " ${status} (${val} ) differs from ${expect} "
63+ fi
64+ return $rc
1465}
1566
16- # Consider the pod has "started" once mysql is reachable
17- # and is part of the primary partition
18- if [ " $1 " = " startup" ]; then
19- mysql_status_check wsrep_cluster_status Primary
20- exit $?
21- fi
67+ function check_sst_in_progress {
68+ local i
69+ # retry to give some time to mysql to set up the SST
70+ for i in $( seq $CHECK_RETRY ) ; do
71+ if [ -e ${MYSQL_SOCKET} ]; then
72+ return 1
73+ elif [ -e ${SST_IN_PROGRESS} ]; then
74+ return 0
75+ else
76+ sleep ${CHECK_WAIT}
77+ fi
78+ done
79+ return 1
80+ }
2281
82+ function check_mysql_ready {
83+ local i
84+ # retry to give some time to mysql to create its socket
85+ for i in $( seq $CHECK_RETRY ) ; do
86+ if [ -e ${MYSQL_SOCKET} ] && mysqladmin -s -u${PROBE_USER} ping > dev/null; then
87+ return 0
88+ else
89+ sleep ${CHECK_WAIT}
90+ fi
91+ done
92+ return 1
93+ }
94+
95+ # Monitor the startup sequence until the galera node is connected
96+ # to a primary component and synced
97+ # NOTE: as of mariadb 10.5, if mysql connects to a non-primary
98+ # partition, it never creates any socket and gets stuck indefinitely.
99+ # In that case, in order to not wait until the startup times out
100+ # (very long), we error out of the probe so that the pod can restart
101+ # and mysql reconnect to a primary partition if possible.
102+ function check_mysql_startup {
103+ # mysql initialization sequence:
104+ # . mysql connects to a remote galera node over port 4567
105+ # . mysql optionally runs a SST (port 4444), SST marker created on disk
106+ # . only at this point, InnoDB is initialized, mysql pidfile and
107+ # mysql socket are created on disk
108+
109+ if pgrep -f detect_gcomm_and_start.sh > /dev/null ; then
110+ log_state " waiting for gcomm URI"
111+ return 1
112+ fi
113+ # pidfile is not written on disk until mysql is ready,
114+ # so look for the mysqld process instead
115+ if ! pgrep -f /usr/libexec/mysqld > /dev/null ; then
116+ log_state " waiting for mysql to start"
117+ return 1
118+ fi
119+
120+ # a bootstrap node must be reachable from the CLI to finish startup
121+ if pgrep -f -- ' --wsrep-cluster-address=gcomm://(\W|$)' > /dev/null; then
122+ check_mysql_ready
123+ return $?
124+ # a joiner node must have an established socket connection before testing further
125+ elif pgrep -f -- ' --wsrep-cluster-address=gcomm://\w' > /dev/null; then
126+ local connections
127+ connections=$( ss -tnH state established src :4567 or dst :4567 | wc -l)
128+ if ! test " ${connections} " -ge 0; then
129+ log_state " waiting for mysql to join a galera cluster"
130+ return 1
131+ fi
132+ else
133+ log_state " could not determine galera startup mode"
134+ exit 1
135+ fi
136+
137+ # a joiner node requires additional startup checks
138+ if [ -e /var/lib/mysql/mysql.sock ]; then
139+ # good case, mysql is ready to be probed from the CLI
140+ # check WSREP status like the regular liveness probe
141+ local status
142+ local comment
143+ status=$( get_mysql_status wsrep_cluster_status)
144+ comment=$( get_mysql_status wsrep_local_state_comment)
145+ if [ " ${status} " = " Primary" -a " ${comment} " = " Synced" ]; then
146+ return 0
147+ elif [ " ${status} " = " Primary" ]; then
148+ log_state " waiting to be synced with the cluster"
149+ return 1
150+ elif [ " ${status} " = " Non-primary" -a " ${comment} " = " Synced" ]; then
151+ log_state " mysql is connected to a non-primary partition, server stopped"
152+ exit 1
153+ else
154+ log_state " waiting for connection to a primary partition"
155+ return 1
156+ fi
157+ else
158+ # if there is no socket, mysql may be running an SST...
159+ if check_sst_in_progress; then
160+ log_state " waiting for SST to finish"
161+ return 1
162+ fi
163+
164+ # ... if no SST was detected, it may have finished before
165+ # we probed it. Check a last time whether we can connect to mysql
166+ if check_mysql_ready; then
167+ return 0
168+ fi
169+
170+ # At this stage, mysql is either trying to connect to a boostrap node
171+ # that resolved to an old pod IP, or it is is connected to a
172+ # non-primary partition. Either way, this is not recoverable, so
173+ # make the probe fail and let k8s kill the mysql server.
174+
175+ log_state " could not find a primary partition to connect to"
176+ exit 1
177+ fi
178+ return 1
179+ }
180+
181+
182+ # startup probe loops until the node started or joined a galera cluster
23183# readiness and liveness probes are run by k8s only after start probe succeeded
24184
25185case " $1 " in
186+ startup)
187+ if [ -z " $2 " ]; then
188+ echo " startup timeout option missing"
189+ exit 1
190+ fi
191+ TIME_TIMEOUT=$2
192+
193+ # Run the entire check in a single startup probe to avoid spurious
194+ # "Unhealthy" k8s events to be logged. The probe stops in error
195+ # if the startup timeout is reached
196+ rc=1
197+ while [ $rc -ne 0 ]; do
198+ if check_mysql_startup; then
199+ exit 0
200+ else
201+ sleep ${STARTUP_WAIT} ;
202+ [ $SECONDS -ge $TIME_TIMEOUT ] && exit 1
203+ fi
204+ done
205+ exit $rc
206+ ;;
26207 readiness)
27208 # If the node is e.g. a donor, it cannot serve traffic
28- mysql_status_check wsrep_local_state_comment Synced
209+ check_mysql_status wsrep_local_state_comment Synced
29210 ;;
30211 liveness)
31212 # If the node is not in the primary partition, the failed liveness probe
32213 # will make k8s restart this pod
33- mysql_status_check wsrep_cluster_status Primary
214+ check_mysql_status wsrep_cluster_status Primary
34215 ;;
35216 * )
36217 echo " Invalid probe option '$1 '"
0 commit comments