Skip to content

Commit 05fdd76

Browse files
authored
Merge branch 'main' into K8SPS-335
2 parents 14b92d7 + 81356af commit 05fdd76

14 files changed

+183
-55
lines changed

e2e-tests/conf/client.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ metadata:
55
labels:
66
name: mysql-client
77
spec:
8+
terminationGracePeriodSeconds: 10
89
containers:
910
- name: mysql-client
1011
image: percona/percona-server:8.0.33

e2e-tests/functions

Lines changed: 157 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ deploy_tls_cluster_secrets() {
136136
}
137137

138138
deploy_client() {
139-
kubectl -n "${NAMESPACE}" apply -f "${TESTS_CONFIG_DIR}/client.yaml"
139+
yq eval "$(printf '.spec.containers[0].image="%s"' "${IMAGE_MYSQL}")" "${TESTS_CONFIG_DIR}/client.yaml" | \
140+
kubectl -n "${NAMESPACE}" apply -f -
140141
}
141142

142143
apply_s3_storage_secrets() {
@@ -444,10 +445,29 @@ run_mysqlsh() {
444445
wait_pod $client_pod 1>&2
445446

446447
kubectl -n "${NAMESPACE}" exec "${pod:-mysql-client}" -- \
447-
bash -c "printf '%s\n' \"${command}\" | mysqlsh --sql --quiet-start=2 $uri" 2>&1 \
448+
bash -c "printf '%s\n' \"${command}\" | mysqlsh --sql --quiet-start=2 $uri" 2>/dev/null \
448449
| tail -n +2
449450
}
450451

452+
get_innodb_cluster_status() {
453+
local uri="$1"
454+
455+
client_pod=$(get_client_pod)
456+
wait_pod $client_pod 1>&2
457+
458+
kubectl -n "${NAMESPACE}" exec "${client_pod}" -- mysqlsh --js --quiet-start=2 --uri ${uri} -- cluster status
459+
}
460+
461+
wait_until_innodb_ok() {
462+
local uri="$1"
463+
464+
local retry=0
465+
until [[ $(get_innodb_cluster_status ${uri} | jq -r .defaultReplicaSet.status) == "OK" ]]; do
466+
sleep 5
467+
retry=$((retry + 1))
468+
done
469+
}
470+
451471
run_curl() {
452472
kubectl -n "${NAMESPACE}" exec mysql-client -- bash -c "curl -s -k $*"
453473
}
@@ -456,6 +476,13 @@ get_innodb_cluster_name() {
456476
echo $(get_cluster_name) | tr -cd '[^a-zA-Z0-9_]+'
457477
}
458478

479+
get_mysqlsh_uri_for_pod() {
480+
local pod=$1
481+
482+
483+
echo "root:root_password@${pod}.$(get_cluster_name)-mysql.${NAMESPACE}"
484+
}
485+
459486
get_mysqlsh_uri() {
460487
local idx=${1:-0}
461488

@@ -468,7 +495,7 @@ get_gr_status() {
468495

469496
client_pod=$(get_client_pod)
470497

471-
kubectl -n "${NAMESPACE}" exec "${pod:-mysql-client}" -- mysqlsh --uri $uri --cluster --result-format json -- cluster status \
498+
kubectl -n "${NAMESPACE}" exec "${pod:-mysql-client}" -- mysqlsh --js --uri $uri --cluster --result-format json -- cluster status \
472499
| sed -e 's/mysql: //' \
473500
| (grep -v 'Using a password on the command line interface can be insecure.' || :)
474501
}
@@ -584,7 +611,7 @@ get_router_pods() {
584611
get_mysql_users() {
585612
local args=$1
586613

587-
run_mysql "SELECT user FROM mysql.user" "${args}" | grep -vE "mysql|root"
614+
run_mysql "SELECT user FROM mysql.user" "${args}" | grep -vE "mysql|root|percona.telemetry"
588615
}
589616

590617
get_service_ip() {
@@ -840,19 +867,14 @@ deploy_chaos_mesh() {
840867

841868
helm repo add chaos-mesh https://charts.chaos-mesh.org
842869
if [ -n "${MINIKUBE}" ]; then
843-
helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER} --wait
870+
helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=docker --set dashboard.create=false --version ${CHAOS_MESH_VER}
844871
else
845872
helm install chaos-mesh chaos-mesh/chaos-mesh --namespace=${NAMESPACE} --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock --set dashboard.create=false --version ${CHAOS_MESH_VER}
846873
fi
847874
if [[ -n $OPENSHIFT ]]; then
848875
oc adm policy add-scc-to-user privileged -z chaos-daemon --namespace=${NAMESPACE}
849876
fi
850-
851-
echo "Waiting for chaos-mesh DaemonSet to be ready..."
852-
until [ "$(kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath='{.status.numberReady}')" = "$(kubectl get daemonset chaos-daemon -n ${NAMESPACE} -o jsonpath='{.status.desiredNumberScheduled}')" ]; do
853-
echo "Waiting for DaemonSet chaos-daemon..."
854-
sleep 5
855-
done
877+
sleep 10
856878
}
857879

858880
destroy_chaos_mesh() {
@@ -884,17 +906,17 @@ kill_pods() {
884906
local selector=$2
885907
local pod_label=$3
886908
local label_value=$4
887-
local chaos_suffix=$5
909+
local chaos_name=$5
888910

889911
if [ "${selector}" == "pod" ]; then
890912
yq eval '
891-
.metadata.name = "chaos-pod-kill-'${chaos_suffix}'" |
913+
.metadata.name = "'${chaos_name}'" |
892914
del(.spec.selector.pods.test-namespace) |
893915
.spec.selector.pods.'${ns}'[0] = "'${pod_label}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \
894916
| kubectl apply --namespace ${ns} -f -
895917
elif [ "${selector}" == "label" ]; then
896918
yq eval '
897-
.metadata.name = "chaos-kill-label-'${chaos_suffix}'" |
919+
.metadata.name = "'${chaos_name}'" |
898920
.spec.mode = "all" |
899921
del(.spec.selector.pods) |
900922
.spec.selector.labelSelectors."'${pod_label}'" = "'${label_value}'"' ${TESTS_CONFIG_DIR}/chaos-pod-kill.yml \
@@ -906,10 +928,10 @@ kill_pods() {
906928
failure_pod() {
907929
local ns=$1
908930
local pod=$2
909-
local chaos_suffix=$3
931+
local chaos_name=$3
910932

911933
yq eval '
912-
.metadata.name = "chaos-pod-failure-'${chaos_suffix}'" |
934+
.metadata.name = "'${chaos_name}'" |
913935
del(.spec.selector.pods.test-namespace) |
914936
.spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-pod-failure.yml \
915937
| kubectl apply --namespace ${ns} -f -
@@ -919,16 +941,133 @@ failure_pod() {
919941
network_loss() {
920942
local ns=$1
921943
local pod=$2
922-
local chaos_suffix=$3
944+
local chaos_name=$3
923945

924946
yq eval '
925-
.metadata.name = "chaos-pod-network-loss-'${chaos_suffix}'" |
947+
.metadata.name = "'${chaos_name}'" |
926948
del(.spec.selector.pods.test-namespace) |
927949
.spec.selector.pods.'${ns}'[0] = "'${pod}'"' ${TESTS_CONFIG_DIR}/chaos-network-loss.yml \
928950
| kubectl apply --namespace ${ns} -f -
929951
sleep 5
930952
}
931953

954+
wait_until_chaos_applied() {
955+
local chaos_type=$1
956+
local chaos_name=$2
957+
958+
local resource
959+
case ${chaos_type} in
960+
"kill"|"failure"|"full-cluster-crash")
961+
resource=podchaos/${chaos_name}
962+
;;
963+
"network")
964+
resource=networkchaos/${chaos_name}
965+
;;
966+
esac
967+
968+
local retry=0
969+
until [[ ${retry} == 30 ]]; do
970+
sleep 10
971+
retry=$((retry + 1))
972+
973+
succeeded=$(kubectl -n ${NAMESPACE} get ${resource} -o yaml \
974+
| yq '.status.experiment.containerRecords[].events[]
975+
| select(.operation == "Apply" and .type == "Succeeded")')
976+
977+
if [[ -n ${succeeded} ]]; then
978+
return
979+
fi
980+
done
981+
982+
echo "Timeout (300s) exceeded while waiting for chaos to be applied"
983+
exit 1
984+
}
985+
986+
wait_until_chaos_recovered() {
987+
local chaos_type=$1
988+
local chaos_name=$2
989+
990+
local resource
991+
case ${chaos_type} in
992+
"kill"|"failure")
993+
resource=podchaos/${chaos_name}
994+
;;
995+
"network")
996+
resource=networkchaos/${chaos_name}
997+
;;
998+
esac
999+
1000+
local retry=0
1001+
until [[ ${retry} == 30 ]]; do
1002+
sleep 10
1003+
retry=$((retry + 1))
1004+
1005+
succeeded=$(kubectl -n ${NAMESPACE} get ${resource} -o yaml \
1006+
| yq '.status.experiment.containerRecords[].events[]
1007+
| select(.operation == "Recover" and .type == "Succeeded")')
1008+
1009+
if [[ -n ${succeeded} ]]; then
1010+
return
1011+
fi
1012+
done
1013+
1014+
echo "Timeout (300s) exceeded while waiting for chaos to be recovered"
1015+
exit 1
1016+
}
1017+
1018+
check_primary_chaos() {
1019+
local chaos_type=$1
1020+
local ns=$2
1021+
local primary_before_failure=$3
1022+
1023+
local chaos_name
1024+
case ${chaos_type} in
1025+
"kill")
1026+
chaos_name="chaos-pod-kill-primary"
1027+
kill_pods "${ns}" "pod" "${primary_before_failure}" "" "${chaos_name}"
1028+
;;
1029+
"full-cluster-crash")
1030+
chaos_name="chaos-kill-label-cluster-crash"
1031+
kill_pods "${ns}" "label" "app.kubernetes.io/instance" "gr-self-healing" "${chaos_name}"
1032+
;;
1033+
"failure")
1034+
chaos_name="chaos-pod-failure-primary"
1035+
failure_pod "${ns}" "${primary_before_failure}" "${chaos_name}"
1036+
;;
1037+
"network")
1038+
chaos_name="chaos-pod-network-loss-primary"
1039+
network_loss "${ns}" "${primary_before_failure}" "${chaos_name}"
1040+
;;
1041+
esac
1042+
1043+
wait_until_chaos_applied ${chaos_type} ${chaos_name}
1044+
if [[ ${chaos_type} == "failure" || ${chaos_type} == "network" ]]; then
1045+
wait_until_chaos_recovered ${chaos_type} ${chaos_name}
1046+
fi
1047+
1048+
wait_cluster_consistency_gr "$(get_cluster_name)" 3 3
1049+
1050+
primary_after_failure=$(get_primary_from_group_replication)
1051+
uri=$(get_mysqlsh_uri_for_pod ${primary_after_failure})
1052+
wait_until_innodb_ok ${uri}
1053+
1054+
if [[ "${primary_before_failure}" == "${primary_after_failure}" ]]; then
1055+
echo "primary pod was not killed! something went wrong."
1056+
exit 1
1057+
fi
1058+
1059+
uri=$(get_mysqlsh_uri_for_pod $(get_primary_from_group_replication))
1060+
online_members=$(get_innodb_cluster_status ${uri} \
1061+
| jq .defaultReplicaSet.topology[].status \
1062+
| grep ONLINE \
1063+
| wc -l)
1064+
1065+
if [[ ${online_members} != 3 ]]; then
1066+
echo "expected 3 online members, got ${online_members}"
1067+
exit 1
1068+
fi
1069+
}
1070+
9321071
renew_certificate() {
9331072
certificate="$1"
9341073

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestStep
3-
timeout: 30
3+
44
commands:
5-
- script: |-
5+
- timeout: 720
6+
script: |-
67
set -o errexit
78
set -o xtrace
89
910
source ../../functions
1011
11-
init_pod="$(get_primary_from_group_replication)"
12-
kill_pods "${NAMESPACE}" "pod" "$init_pod" "" "primary"
13-
sleep 20 # wait a bit for pod to be killed
14-
15-
if [ "$init_pod" == "$(get_primary_from_group_replication)" ]; then
16-
echo "primary pod was not killed! something went wrong."
17-
exit 1
18-
fi
12+
check_primary_chaos "kill" ${NAMESPACE} $(get_primary_from_group_replication)
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestStep
3-
timeout: 30
43
commands:
5-
- script: |-
4+
- timeout: 720
5+
script: |-
66
set -o errexit
77
set -o xtrace
88
99
source ../../functions
1010
11-
failure_pod "${NAMESPACE}" "$(get_primary_from_group_replication)" "primary"
12-
sleep 10 # wait a bit for pod to be killed
11+
check_primary_chaos "failure" ${NAMESPACE} $(get_primary_from_group_replication)
Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestStep
3-
timeout: 90
43
commands:
5-
- script: |-
4+
- timeout: 720
5+
script: |-
66
set -o errexit
77
set -o xtrace
88
99
source ../../functions
1010
11-
network_loss "${NAMESPACE}" "$(get_primary_from_group_replication)" "primary"
12-
sleep 30 # wait for new master to get elected
13-
timeout: 90
11+
check_primary_chaos "network" ${NAMESPACE} $(get_primary_from_group_replication)

e2e-tests/tests/gr-self-healing/12-write-data.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,3 @@ commands:
1010
run_mysql \
1111
"INSERT myDB.myTable (id) VALUES (100503)" \
1212
"-h $(get_mysql_router_service $(get_cluster_name)) -P 6446 -uroot -proot_password"
13-
sleep 10

e2e-tests/tests/gr-self-healing/13-assert.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestAssert
3-
timeout: 240
3+
timeout: 30
44
---
55
apiVersion: ps.percona.com/v1alpha1
66
kind: PerconaServerMySQL

e2e-tests/tests/gr-self-healing/13-read-from-replicas.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestStep
3-
timeout: 120
43
commands:
54
- script: |-
65
set -o errexit
@@ -13,4 +12,3 @@ commands:
1312
data=$(run_mysql "SELECT * FROM myDB.myTable" "-h ${host} -uroot -proot_password")
1413
kubectl create configmap -n "${NAMESPACE}" 13-read-from-replicas-${i} --from-literal=data="${data}"
1514
done
16-
sleep 20
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestStep
33
commands:
4-
- script: |-
4+
- timeout: 720
5+
script: |-
56
set -o errexit
67
set -o xtrace
78
89
source ../../functions
910
10-
kill_pods "${NAMESPACE}" "label" "app.kubernetes.io/instance" "gr-self-healing" "cluster-crash"
11-
sleep 30 # wait for crash
12-
timeout: 100
11+
check_primary_chaos "full-cluster-crash" ${NAMESPACE} $(get_primary_from_group_replication)

e2e-tests/tests/gr-self-healing/17-quorum-loss.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
apiVersion: kuttl.dev/v1beta1
22
kind: TestStep
3-
timeout: 480
3+
timeout: 30
44
commands:
55
- script: |-
66
set -o errexit

0 commit comments

Comments
 (0)