@@ -136,7 +136,8 @@ deploy_tls_cluster_secrets() {
136
136
}
137
137
138
138
deploy_client () {
139
- kubectl -n " ${NAMESPACE} " apply -f " ${TESTS_CONFIG_DIR} /client.yaml"
139
+ yq eval " $( printf ' .spec.containers[0].image="%s"' " ${IMAGE_MYSQL} " ) " " ${TESTS_CONFIG_DIR} /client.yaml" | \
140
+ kubectl -n " ${NAMESPACE} " apply -f -
140
141
}
141
142
142
143
apply_s3_storage_secrets () {
@@ -444,10 +445,29 @@ run_mysqlsh() {
444
445
wait_pod $client_pod 1>&2
445
446
446
447
kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- \
447
- bash -c " printf '%s\n' \" ${command} \" | mysqlsh --sql --quiet-start=2 $uri " 2>&1 \
448
+ bash -c " printf '%s\n' \" ${command} \" | mysqlsh --sql --quiet-start=2 $uri " 2> /dev/null \
448
449
| tail -n +2
449
450
}
450
451
452
+ get_innodb_cluster_status () {
453
+ local uri=" $1 "
454
+
455
+ client_pod=$( get_client_pod)
456
+ wait_pod $client_pod 1>&2
457
+
458
+ kubectl -n " ${NAMESPACE} " exec " ${client_pod} " -- mysqlsh --js --quiet-start=2 --uri ${uri} -- cluster status
459
+ }
460
+
461
+ wait_until_innodb_ok () {
462
+ local uri=" $1 "
463
+
464
+ local retry=0
465
+ until [[ $( get_innodb_cluster_status ${uri} | jq -r .defaultReplicaSet.status) == " OK" ]]; do
466
+ sleep 5
467
+ retry=$(( retry + 1 ))
468
+ done
469
+ }
470
+
451
471
run_curl () {
452
472
kubectl -n " ${NAMESPACE} " exec mysql-client -- bash -c " curl -s -k $* "
453
473
}
@@ -456,6 +476,13 @@ get_innodb_cluster_name() {
456
476
echo $( get_cluster_name) | tr -cd ' [^a-zA-Z0-9_]+'
457
477
}
458
478
479
+ get_mysqlsh_uri_for_pod () {
480
+ local pod=$1
481
+
482
+
483
+ echo " root:root_password@${pod} .$( get_cluster_name) -mysql.${NAMESPACE} "
484
+ }
485
+
459
486
get_mysqlsh_uri () {
460
487
local idx=${1:- 0}
461
488
@@ -468,7 +495,7 @@ get_gr_status() {
468
495
469
496
client_pod=$( get_client_pod)
470
497
471
- kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- mysqlsh --uri $uri --cluster --result-format json -- cluster status \
498
+ kubectl -n " ${NAMESPACE} " exec " ${pod:- mysql-client} " -- mysqlsh --js -- uri $uri --cluster --result-format json -- cluster status \
472
499
| sed -e ' s/mysql: //' \
473
500
| (grep -v ' Using a password on the command line interface can be insecure.' || :)
474
501
}
@@ -584,7 +611,7 @@ get_router_pods() {
584
611
get_mysql_users () {
585
612
local args=$1
586
613
587
- run_mysql " SELECT user FROM mysql.user" " ${args} " | grep -vE " mysql|root"
614
+ run_mysql " SELECT user FROM mysql.user" " ${args} " | grep -vE " mysql|root|percona.telemetry "
588
615
}
589
616
590
617
get_service_ip () {
@@ -840,19 +867,14 @@ deploy_chaos_mesh() {
840
867
841
868
helm repo add chaos-mesh https://charts.chaos-mesh.org
842
869
if [ -n " ${MINIKUBE} " ]; then
843
- helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER} --wait
870
+ helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER}
844
871
else
845
872
helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock --set dashboard.create=false --version ${CHAOS_MESH_VER}
846
873
fi
847
874
if [[ -n $OPENSHIFT ]]; then
848
875
oc adm policy add-scc-to-user privileged -z chaos-daemon --namespace=${NAMESPACE}
849
876
fi
850
-
851
- echo " Waiting for chaos-mesh DaemonSet to be ready..."
852
- until [ " $( kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath=' {.status.numberReady}' ) " = " $( kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath=' {.status.desiredNumberScheduled}' ) " ]; do
853
- echo " Waiting for DaemonSet chaos-daemon..."
854
- sleep 5
855
- done
877
+ sleep 10
856
878
}
857
879
858
880
destroy_chaos_mesh () {
@@ -884,17 +906,17 @@ kill_pods() {
884
906
local selector=$2
885
907
local pod_label=$3
886
908
local label_value=$4
887
- local chaos_suffix =$5
909
+ local chaos_name =$5
888
910
889
911
if [ " ${selector} " == " pod" ]; then
890
912
yq eval '
891
- .metadata.name = "chaos-pod-kill- ' ${chaos_suffix } ' " |
913
+ .metadata.name = "' ${chaos_name } ' " |
892
914
del(.spec.selector.pods.test-namespace) |
893
915
.spec.selector.pods.' ${ns} ' [0] = "' ${pod_label} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-kill.yml \
894
916
| kubectl apply --namespace ${ns} -f -
895
917
elif [ " ${selector} " == " label" ]; then
896
918
yq eval '
897
- .metadata.name = "chaos-kill-label- ' ${chaos_suffix } ' " |
919
+ .metadata.name = "' ${chaos_name } ' " |
898
920
.spec.mode = "all" |
899
921
del(.spec.selector.pods) |
900
922
.spec.selector.labelSelectors."' ${pod_label} ' " = "' ${label_value} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-kill.yml \
@@ -906,10 +928,10 @@ kill_pods() {
906
928
failure_pod () {
907
929
local ns=$1
908
930
local pod=$2
909
- local chaos_suffix =$3
931
+ local chaos_name =$3
910
932
911
933
yq eval '
912
- .metadata.name = "chaos-pod-failure- ' ${chaos_suffix } ' " |
934
+ .metadata.name = "' ${chaos_name } ' " |
913
935
del(.spec.selector.pods.test-namespace) |
914
936
.spec.selector.pods.' ${ns} ' [0] = "' ${pod} ' "' ${TESTS_CONFIG_DIR} /chaos-pod-failure.yml \
915
937
| kubectl apply --namespace ${ns} -f -
@@ -919,16 +941,133 @@ failure_pod() {
919
941
network_loss () {
920
942
local ns=$1
921
943
local pod=$2
922
- local chaos_suffix =$3
944
+ local chaos_name =$3
923
945
924
946
yq eval '
925
- .metadata.name = "chaos-pod-network-loss- ' ${chaos_suffix } ' " |
947
+ .metadata.name = "' ${chaos_name } ' " |
926
948
del(.spec.selector.pods.test-namespace) |
927
949
.spec.selector.pods.' ${ns} ' [0] = "' ${pod} ' "' ${TESTS_CONFIG_DIR} /chaos-network-loss.yml \
928
950
| kubectl apply --namespace ${ns} -f -
929
951
sleep 5
930
952
}
931
953
954
+ wait_until_chaos_applied () {
955
+ local chaos_type=$1
956
+ local chaos_name=$2
957
+
958
+ local resource
959
+ case ${chaos_type} in
960
+ " kill" |" failure" |" full-cluster-crash" )
961
+ resource=podchaos/${chaos_name}
962
+ ;;
963
+ " network" )
964
+ resource=networkchaos/${chaos_name}
965
+ ;;
966
+ esac
967
+
968
+ local retry=0
969
+ until [[ ${retry} == 30 ]]; do
970
+ sleep 10
971
+ retry=$(( retry + 1 ))
972
+
973
+ succeeded=$( kubectl -n ${NAMESPACE} get ${resource} -o yaml \
974
+ | yq ' .status.experiment.containerRecords[].events[]
975
+ | select(.operation == "Apply" and .type == "Succeeded")' )
976
+
977
+ if [[ -n ${succeeded} ]]; then
978
+ return
979
+ fi
980
+ done
981
+
982
+ echo " Timeout (300s) exceeded while waiting for chaos to be applied"
983
+ exit 1
984
+ }
985
+
986
+ wait_until_chaos_recovered () {
987
+ local chaos_type=$1
988
+ local chaos_name=$2
989
+
990
+ local resource
991
+ case ${chaos_type} in
992
+ " kill" |" failure" )
993
+ resource=podchaos/${chaos_name}
994
+ ;;
995
+ " network" )
996
+ resource=networkchaos/${chaos_name}
997
+ ;;
998
+ esac
999
+
1000
+ local retry=0
1001
+ until [[ ${retry} == 30 ]]; do
1002
+ sleep 10
1003
+ retry=$(( retry + 1 ))
1004
+
1005
+ succeeded=$( kubectl -n ${NAMESPACE} get ${resource} -o yaml \
1006
+ | yq ' .status.experiment.containerRecords[].events[]
1007
+ | select(.operation == "Recover" and .type == "Succeeded")' )
1008
+
1009
+ if [[ -n ${succeeded} ]]; then
1010
+ return
1011
+ fi
1012
+ done
1013
+
1014
+ echo " Timeout (300s) exceeded while waiting for chaos to be recovered"
1015
+ exit 1
1016
+ }
1017
+
1018
+ check_primary_chaos () {
1019
+ local chaos_type=$1
1020
+ local ns=$2
1021
+ local primary_before_failure=$3
1022
+
1023
+ local chaos_name
1024
+ case ${chaos_type} in
1025
+ " kill" )
1026
+ chaos_name=" chaos-pod-kill-primary"
1027
+ kill_pods " ${ns} " " pod" " ${primary_before_failure} " " " " ${chaos_name} "
1028
+ ;;
1029
+ " full-cluster-crash" )
1030
+ chaos_name=" chaos-kill-label-cluster-crash"
1031
+ kill_pods " ${ns} " " label" " app.kubernetes.io/instance" " gr-self-healing" " ${chaos_name} "
1032
+ ;;
1033
+ " failure" )
1034
+ chaos_name=" chaos-pod-failure-primary"
1035
+ failure_pod " ${ns} " " ${primary_before_failure} " " ${chaos_name} "
1036
+ ;;
1037
+ " network" )
1038
+ chaos_name=" chaos-pod-network-loss-primary"
1039
+ network_loss " ${ns} " " ${primary_before_failure} " " ${chaos_name} "
1040
+ ;;
1041
+ esac
1042
+
1043
+ wait_until_chaos_applied ${chaos_type} ${chaos_name}
1044
+ if [[ ${chaos_type} == " failure" || ${chaos_type} == " network" ]]; then
1045
+ wait_until_chaos_recovered ${chaos_type} ${chaos_name}
1046
+ fi
1047
+
1048
+ wait_cluster_consistency_gr " $( get_cluster_name) " 3 3
1049
+
1050
+ primary_after_failure=$( get_primary_from_group_replication)
1051
+ uri=$( get_mysqlsh_uri_for_pod ${primary_after_failure} )
1052
+ wait_until_innodb_ok ${uri}
1053
+
1054
+ if [[ " ${primary_before_failure} " == " ${primary_after_failure} " ]]; then
1055
+ echo " primary pod was not killed! something went wrong."
1056
+ exit 1
1057
+ fi
1058
+
1059
+ uri=$( get_mysqlsh_uri_for_pod $( get_primary_from_group_replication) )
1060
+ online_members=$( get_innodb_cluster_status ${uri} \
1061
+ | jq .defaultReplicaSet.topology[].status \
1062
+ | grep ONLINE \
1063
+ | wc -l)
1064
+
1065
+ if [[ ${online_members} != 3 ]]; then
1066
+ echo " expected 3 online members, got ${online_members} "
1067
+ exit 1
1068
+ fi
1069
+ }
1070
+
932
1071
renew_certificate () {
933
1072
certificate=" $1 "
934
1073
0 commit comments