@@ -100,7 +100,8 @@ deploy_tls_cluster_secrets() {
100100}
101101
102102deploy_client () {
103- kubectl -n " ${NAMESPACE} " apply -f " ${TESTS_CONFIG_DIR} /client.yaml"
103+ yq eval " $( printf ' .spec.containers[0].image="%s"' " ${IMAGE_MYSQL} " ) " " ${TESTS_CONFIG_DIR} /client.yaml" | \
104+ kubectl -n " ${NAMESPACE} " apply -f -
104105}
105106
106107apply_s3_storage_secrets () {
@@ -385,10 +386,29 @@ run_mysqlsh() {
385386 wait_pod $client_pod 1>&2
386387
387388 kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- \
388- bash -c " printf '%s\n' \" ${command} \" | mysqlsh --sql --quiet-start=2 $uri " 2>&1 \
389+ bash -c " printf '%s\n' \" ${command} \" | mysqlsh --sql --quiet-start=2 $uri " 2> /dev/null \
389390 | tail -n +2
390391}
391392
393+ get_innodb_cluster_status () {
394+ local uri=" $1 "
395+
396+ client_pod=$( get_client_pod)
397+ wait_pod $client_pod 1>&2
398+
399+ kubectl -n " ${NAMESPACE} " exec " ${client_pod} " -- mysqlsh --js --quiet-start=2 --uri ${uri} -- cluster status
400+ }
401+
402+ wait_until_innodb_ok () {
403+ local uri=" $1 "
404+
405+ local retry=0
406+ until [[ $( get_innodb_cluster_status ${uri} | jq -r .defaultReplicaSet.status) == " OK" ]]; do
407+ sleep 5
408+ retry=$(( retry + 1 ))
409+ done
410+ }
411+
392412run_curl () {
393413 kubectl -n " ${NAMESPACE} " exec mysql-client -- bash -c " curl -s -k $* "
394414}
@@ -397,6 +417,13 @@ get_innodb_cluster_name() {
397417 echo $( get_cluster_name) | tr -cd ' [^a-zA-Z0-9_]+'
398418}
399419
420+ get_mysqlsh_uri_for_pod () {
421+ local pod=$1
422+
423+
424+ echo " root:root_password@${pod} .$( get_cluster_name) -mysql.${NAMESPACE} "
425+ }
426+
400427get_mysqlsh_uri () {
401428 local idx=${1:- 0}
402429
@@ -409,7 +436,7 @@ get_gr_status() {
409436
410437 client_pod=$( get_client_pod)
411438
412- kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- mysqlsh --uri $uri --cluster --result-format json -- cluster status \
439+ kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- mysqlsh --js -- uri $uri --cluster --result-format json -- cluster status \
413440 | sed -e ' s/mysql: //' \
414441 | (grep -v ' Using a password on the command line interface can be insecure.' || :)
415442}
@@ -525,7 +552,7 @@ get_router_pods() {
525552get_mysql_users () {
526553 local args=$1
527554
528- run_mysql " SELECT user FROM mysql.user" " ${args} " | grep -vE " mysql|root"
555+ run_mysql " SELECT user FROM mysql.user" " ${args} " | grep -vE " mysql|root|percona.telemetry "
529556}
530557
531558get_service_ip () {
@@ -780,19 +807,14 @@ deploy_chaos_mesh() {
780807
781808 helm repo add chaos-mesh https://charts.chaos-mesh.org
782809 if [ -n " ${MINIKUBE} " ]; then
783- helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER} --wait
810+ helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER}
784811 else
785812 helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock --set dashboard.create=false --version ${CHAOS_MESH_VER}
786813 fi
787814 if [[ -n $OPENSHIFT ]]; then
788815 oc adm policy add-scc-to-user privileged -z chaos-daemon --namespace=${NAMESPACE}
789816 fi
790-
791- echo " Waiting for chaos-mesh DaemonSet to be ready..."
792- until [ " $( kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath=' {.status.numberReady}' ) " = " $( kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath=' {.status.desiredNumberScheduled}' ) " ]; do
793- echo " Waiting for DaemonSet chaos-daemon..."
794- sleep 5
795- done
817+ sleep 10
796818}
797819
798820destroy_chaos_mesh () {
@@ -824,17 +846,17 @@ kill_pods() {
824846 local selector=$2
825847 local pod_label=$3
826848 local label_value=$4
827- local chaos_suffix =$5
849+ local chaos_name =$5
828850
829851 if [ " ${selector} " == " pod" ]; then
830852 yq eval '
831- .metadata.name = "chaos-pod-kill- ' ${chaos_suffix } ' " |
853+ .metadata.name = "' ${chaos_name } ' " |
832854 del(.spec.selector.pods.test-namespace) |
833855 .spec.selector.pods.' ${ns} ' [0] = "' ${pod_label} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-kill.yml \
834856 | kubectl apply --namespace ${ns} -f -
835857 elif [ " ${selector} " == " label" ]; then
836858 yq eval '
837- .metadata.name = "chaos-kill-label- ' ${chaos_suffix } ' " |
859+ .metadata.name = "' ${chaos_name } ' " |
838860 .spec.mode = "all" |
839861 del(.spec.selector.pods) |
840862 .spec.selector.labelSelectors."' ${pod_label} ' " = "' ${label_value} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-kill.yml \
@@ -846,10 +868,10 @@ kill_pods() {
846868failure_pod () {
847869 local ns=$1
848870 local pod=$2
849- local chaos_suffix =$3
871+ local chaos_name =$3
850872
851873 yq eval '
852- .metadata.name = "chaos-pod-failure- ' ${chaos_suffix } ' " |
874+ .metadata.name = "' ${chaos_name } ' " |
853875 del(.spec.selector.pods.test-namespace) |
854876 .spec.selector.pods.' ${ns} ' [0] = "' ${pod} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-failure.yml \
855877 | kubectl apply --namespace ${ns} -f -
@@ -859,16 +881,133 @@ failure_pod() {
859881network_loss () {
860882 local ns=$1
861883 local pod=$2
862- local chaos_suffix =$3
884+ local chaos_name =$3
863885
864886 yq eval '
865- .metadata.name = "chaos-pod-network-loss- ' ${chaos_suffix } ' " |
887+ .metadata.name = "' ${chaos_name } ' " |
866888 del(.spec.selector.pods.test-namespace) |
867889 .spec.selector.pods.' ${ns} ' [0] = "' ${pod} ' "' ${TESTS_CONFIG_DIR} /chaos-network-loss.yml \
868890 | kubectl apply --namespace ${ns} -f -
869891 sleep 5
870892}
871893
894+ wait_until_chaos_applied () {
895+ local chaos_type=$1
896+ local chaos_name=$2
897+
898+ local resource
899+ case ${chaos_type} in
900+ " kill" |" failure" |" full-cluster-crash" )
901+ resource=podchaos/${chaos_name}
902+ ;;
903+ " network" )
904+ resource=networkchaos/${chaos_name}
905+ ;;
906+ esac
907+
908+ local retry=0
909+ until [[ ${retry} == 30 ]]; do
910+ sleep 10
911+ retry=$(( retry + 1 ))
912+
913+ succeeded=$( kubectl -n ${NAMESPACE} get ${resource} -o yaml \
914+ | yq ' .status.experiment.containerRecords[].events[]
915+ | select(.operation == "Apply" and .type == "Succeeded")' )
916+
917+ if [[ -n ${succeeded} ]]; then
918+ return
919+ fi
920+ done
921+
922+ echo " Timeout (300s) exceeded while waiting for chaos to be applied"
923+ exit 1
924+ }
925+
926+ wait_until_chaos_recovered () {
927+ local chaos_type=$1
928+ local chaos_name=$2
929+
930+ local resource
931+ case ${chaos_type} in
932+ " kill" |" failure" )
933+ resource=podchaos/${chaos_name}
934+ ;;
935+ " network" )
936+ resource=networkchaos/${chaos_name}
937+ ;;
938+ esac
939+
940+ local retry=0
941+ until [[ ${retry} == 30 ]]; do
942+ sleep 10
943+ retry=$(( retry + 1 ))
944+
945+ succeeded=$( kubectl -n ${NAMESPACE} get ${resource} -o yaml \
946+ | yq ' .status.experiment.containerRecords[].events[]
947+ | select(.operation == "Recover" and .type == "Succeeded")' )
948+
949+ if [[ -n ${succeeded} ]]; then
950+ return
951+ fi
952+ done
953+
954+ echo " Timeout (300s) exceeded while waiting for chaos to be recovered"
955+ exit 1
956+ }
957+
958+ check_primary_chaos () {
959+ local chaos_type=$1
960+ local ns=$2
961+ local primary_before_failure=$3
962+
963+ local chaos_name
964+ case ${chaos_type} in
965+ " kill" )
966+ chaos_name=" chaos-pod-kill-primary"
967+ kill_pods " ${ns} " " pod" " ${primary_before_failure} " " " " ${chaos_name} "
968+ ;;
969+ " full-cluster-crash" )
970+ chaos_name=" chaos-kill-label-cluster-crash"
971+ kill_pods " ${ns} " " label" " app.kubernetes.io/instance" " gr-self-healing" " ${chaos_name} "
972+ ;;
973+ " failure" )
974+ chaos_name=" chaos-pod-failure-primary"
975+ failure_pod " ${ns} " " ${primary_before_failure} " " ${chaos_name} "
976+ ;;
977+ " network" )
978+ chaos_name=" chaos-pod-network-loss-primary"
979+ network_loss " ${ns} " " ${primary_before_failure} " " ${chaos_name} "
980+ ;;
981+ esac
982+
983+ wait_until_chaos_applied ${chaos_type} ${chaos_name}
984+ if [[ ${chaos_type} == " failure" || ${chaos_type} == " network" ]]; then
985+ wait_until_chaos_recovered ${chaos_type} ${chaos_name}
986+ fi
987+
988+ wait_cluster_consistency_gr " $( get_cluster_name) " 3 3
989+
990+ primary_after_failure=$( get_primary_from_group_replication)
991+ uri=$( get_mysqlsh_uri_for_pod ${primary_after_failure} )
992+ wait_until_innodb_ok ${uri}
993+
994+ if [[ " ${primary_before_failure} " == " ${primary_after_failure} " ]]; then
995+ echo " primary pod was not killed! something went wrong."
996+ exit 1
997+ fi
998+
999+ uri=$( get_mysqlsh_uri_for_pod $( get_primary_from_group_replication) )
1000+ online_members=$( get_innodb_cluster_status ${uri} \
1001+ | jq .defaultReplicaSet.topology[].status \
1002+ | grep ONLINE \
1003+ | wc -l)
1004+
1005+ if [[ ${online_members} != 3 ]]; then
1006+ echo " expected 3 online members, got ${online_members} "
1007+ exit 1
1008+ fi
1009+ }
1010+
8721011renew_certificate () {
8731012 certificate=" $1 "
8741013
0 commit comments