Skip to content

Commit e622a56

Browse files
committed
K8SPSMDB-1212: Disable balancer during backups/restores
1 parent 4e0b70e commit e622a56

File tree

23 files changed

+381
-178
lines changed

23 files changed

+381
-178
lines changed
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
apiVersion: psmdb.percona.com/v1
2+
kind: PerconaServerMongoDBBackup
3+
metadata:
4+
finalizers:
5+
- percona.com/delete-backup
6+
name: backup-minio
7+
spec:
8+
clusterName: some-name
9+
storageName: minio
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
apiVersion: psmdb.percona.com/v1
2+
kind: PerconaServerMongoDBRestore
3+
metadata:
4+
name:
5+
spec:
6+
clusterName: some-name
7+
backupName:

e2e-tests/balancer/conf/some-name-rs0.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,17 @@ spec:
77
image:
88
imagePullPolicy: Always
99
backup:
10-
enabled: false
10+
enabled: true
11+
image: perconalab/percona-server-mongodb-operator:1.1.0-backup
12+
storages:
13+
minio:
14+
type: s3
15+
s3:
16+
credentialsSecret: minio-secret
17+
region: us-east-1
18+
bucket: operator-testing
19+
endpointUrl: http://minio-service:9000/
20+
insecureSkipTLSVerify: false
1121
sharding:
1222
enabled: true
1323
configsvrReplSet:

e2e-tests/balancer/run

Lines changed: 49 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,70 @@
11
#!/bin/bash
22

33
set -o errexit
4-
set -o xtrace
54

65
test_dir=$(realpath "$(dirname "$0")")
76
. "${test_dir}/../functions"
87
set_debug
98

109
check_balancer() {
11-
local expected=$1 # should be "full" (running balancer) or "off" (disabled balancer)
12-
10+
local cluster=$1
11+
local expected=$2 # should be "full" (running balancer) or "off" (disabled balancer)
12+
local delay=${3:-"0"}
1313
local balancer_running
14+
15+
echo "sleeping for ${delay} seconds..."
16+
sleep ${delay}
17+
1418
balancer_running=$(run_mongos 'db.adminCommand({balancerStatus: 1}).mode' "clusterAdmin:clusterAdmin123456@$cluster-mongos.$namespace" \
1519
| grep -E -v "Percona Server for MongoDB|connecting to:|Implicit session:|versions do not match|Error saving history file:|bye")
1620

21+
echo -n "checking if balancer status is ${expected}..."
1722
if [[ $balancer_running != "$expected" ]]; then
23+
echo
1824
echo "Unexpected output from \"db.adminCommand({balancerStatus: 1}).mode\": $balancer_running"
1925
echo "Expected $expected"
2026
exit 1
2127
fi
28+
echo "OK"
2229
}
2330

24-
check_service() {
25-
state=$1
26-
svc_name=$2
27-
if [ $state = "present" ]; then
28-
echo -n "check that $svc_name was created"
29-
local timeout=0
30-
until kubectl_bin get service/$svc_name -o 'jsonpath={.spec.type}' 2>&1 | grep -vq NotFound; do
31-
sleep 1
32-
timeout=$((timeout + 1))
33-
echo -n '.'
34-
if [[ ${timeout} -gt 900 ]]; then
35-
echo "Waiting timeout has been reached. Service $svc_name is not present. Exiting..."
36-
exit 1
37-
fi
38-
done
39-
echo ".OK"
40-
elif [ $state = "removed" ]; then
41-
echo -n "check that $svc_name was removed"
42-
if [[ -z $(kubectl_bin get service/$svc_name -o 'jsonpath={.spec.type}' 2>&1 | grep NotFound) ]]; then
43-
echo "$svc_name was not removed."
44-
exit 1
45-
else
46-
echo ".OK"
47-
fi
48-
else
49-
echo "unknown state $state"
50-
fi
31+
check_backup_and_restore() {
32+
local cluster=$1
33+
local backup_suffix=$2
34+
local balancer_end_state=$3
35+
local backup_name="backup-minio-${backup_suffix}"
36+
37+
echo "running backup: ${backup_name}"
38+
run_backup "minio" "${backup_name}"
39+
wait_backup "${backup_name}" "requested"
40+
41+
echo "checking if balancer is disabled"
42+
check_balancer ${cluster} "off"
43+
44+
wait_backup "${backup_name}" "ready"
45+
46+
echo "checking if balancer is ${balancer_end_state} after backup"
47+
check_balancer ${cluster} ${balancer_end_state} 10
48+
49+
echo "running restore: restore-${backup_name}"
50+
run_restore "${backup_name}"
51+
wait_restore ${backup_name} ${cluster} "requested"
52+
53+
echo "checking if balancer is disabled"
54+
check_balancer ${cluster} "off"
55+
56+
wait_restore ${backup_name} ${cluster} "ready"
57+
58+
echo "checking if balancer is ${balancer_end_state} after restore"
59+
check_balancer ${cluster} ${balancer_end_state} 10
5160
}
5261

5362
main() {
5463
create_infra "$namespace"
5564

65+
deploy_minio
66+
apply_s3_storage_secrets
67+
5668
desc 'create first PSMDB cluster'
5769
cluster="some-name"
5870
kubectl_bin apply \
@@ -73,27 +85,19 @@ main() {
7385
wait_for_running $cluster-rs0 3
7486
wait_for_running $cluster-cfg 3 "false"
7587
wait_for_running $cluster-mongos 3
76-
sleep 20
77-
check_balancer "full"
88+
check_balancer ${cluster} "full" 10
89+
90+
check_backup_and_restore ${cluster} "0" "full"
7891

7992
desc 'disabling balancer'
8093
kubectl patch psmdb some-name --type=merge -p '{"spec":{"sharding":{"balancer":{"enabled":false}}}}'
81-
sleep 20
82-
check_balancer "off"
94+
check_balancer ${cluster} "off" 10
95+
96+
check_backup_and_restore ${cluster} "1" "off"
8397

8498
desc 'enabling balancer'
8599
kubectl patch psmdb some-name --type=merge -p '{"spec":{"sharding":{"balancer":{"enabled":true}}}}'
86-
sleep 20
87-
check_balancer "full"
88-
89-
# Add check that servicePerPod creates 3 services for the running cluster
90-
desc 'enabling servicePerPod for mongos'
91-
kubectl patch psmdb some-name --type=merge -p '{"spec":{"sharding":{"mongos":{"expose":{"servicePerPod":true}}}}}'
92-
wait_for_running $cluster-mongos 3
93-
check_service present $cluster-mongos-0
94-
check_service present $cluster-mongos-1
95-
check_service present $cluster-mongos-2
96-
check_service removed $cluster-mongos
100+
check_balancer ${cluster} "full" 10
97101

98102
destroy "$namespace"
99103
}

e2e-tests/functions

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,13 @@ wait_backup_agent() {
213213

214214
wait_backup() {
215215
local backup_name=$1
216+
local target_state=${2:-"ready"}
216217

217218
set +o xtrace
218219
retry=0
219-
echo -n $backup_name
220+
echo -n "waiting for ${backup_name} to reach ${target_state} state"
220221
local current_status=
221-
until [[ ${current_status} == "ready" ]]; do
222+
until [[ ${current_status} == ${target_state} ]]; do
222223
sleep 1
223224
echo -n .
224225
let retry+=1
@@ -340,7 +341,7 @@ wait_restore() {
340341

341342
set +o xtrace
342343
retry=0
343-
echo -n "waiting psmdb-restore/${backup_name} to reach ${target_state} state"
344+
echo -n "waiting psmdb-restore/restore-${backup_name} to reach ${target_state} state"
344345
local current_state=
345346
until [[ ${current_state} == ${target_state} ]]; do
346347
sleep 1
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
apiVersion: psmdb.percona.com/v1
2+
kind: PerconaServerMongoDB
3+
metadata:
4+
name: some-name
5+
spec:
6+
#platform: openshift
7+
image:
8+
imagePullPolicy: Always
9+
backup:
10+
enabled: false
11+
sharding:
12+
enabled: true
13+
configsvrReplSet:
14+
size: 3
15+
volumeSpec:
16+
persistentVolumeClaim:
17+
resources:
18+
requests:
19+
storage: 3Gi
20+
expose:
21+
enabled: false
22+
23+
mongos:
24+
size: 3
25+
configuration: |
26+
replication:
27+
localPingThresholdMs: 15
28+
expose:
29+
type: ClusterIP
30+
31+
replsets:
32+
- name: rs0
33+
affinity:
34+
antiAffinityTopologyKey: none
35+
expose:
36+
enabled: false
37+
resources:
38+
limits:
39+
cpu: 500m
40+
memory: 1G
41+
requests:
42+
cpu: 100m
43+
memory: 0.1G
44+
volumeSpec:
45+
persistentVolumeClaim:
46+
resources:
47+
requests:
48+
storage: 1Gi
49+
size: 3
50+
secrets:
51+
users: some-users

e2e-tests/service-per-pod/run

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ set_debug
99
check_cr_config() {
1010
local cluster="$1"
1111

12-
desc "create first PSMDB cluster $cluster"
12+
desc "create PSMDB cluster $cluster"
1313
apply_cluster $test_dir/conf/$cluster.yml
1414

1515
desc 'check if all 3 Pods started'
@@ -40,8 +40,8 @@ check_cr_config() {
4040
compare_mongo_cmd "find" "myApp:myPass@$(get_service_ip $cluster-1)" "" ":27017"
4141
compare_mongo_cmd "find" "myApp:myPass@$(get_service_ip $cluster-2)" "" ":27017"
4242

43-
desc 'add service-per-pod lebel and annotation'
4443
if [[ $cluster == "node-port-rs0" ]]; then
44+
desc 'add service-per-pod label and annotation'
4545
old_node_port=$(kubectl_bin get svc node-port-rs0-0 -o 'jsonpath={.spec.ports[0].nodePort}')
4646
kubectl_bin patch psmdb node-port --type=json --patch '[
4747
{
@@ -74,6 +74,36 @@ check_cr_config() {
7474
-f $test_dir/conf/$cluster.yml
7575
}
7676

77+
check_service() {
78+
state=$1
79+
svc_name=$2
80+
if [ $state = "present" ]; then
81+
echo -n "check that $svc_name was created"
82+
local timeout=0
83+
until kubectl_bin get service/$svc_name -o 'jsonpath={.spec.type}' 2>&1 | grep -vq NotFound; do
84+
sleep 1
85+
timeout=$((timeout + 1))
86+
echo -n '.'
87+
if [[ ${timeout} -gt 900 ]]; then
88+
echo "Waiting timeout has been reached. Service $svc_name is not present. Exiting..."
89+
exit 1
90+
fi
91+
done
92+
echo ".OK"
93+
elif [ $state = "removed" ]; then
94+
echo -n "check that $svc_name was removed"
95+
if [[ -z $(kubectl_bin get service/$svc_name -o 'jsonpath={.spec.type}' 2>&1 | grep NotFound) ]]; then
96+
echo "$svc_name was not removed."
97+
exit 1
98+
else
99+
echo ".OK"
100+
fi
101+
else
102+
echo "unknown state $state"
103+
fi
104+
}
105+
106+
77107
main() {
78108
create_infra $namespace
79109
deploy_cert_manager
@@ -92,6 +122,21 @@ main() {
92122
desc 'check NodePort'
93123
check_cr_config "node-port-rs0"
94124

125+
desc 'check Mongos in sharded cluster'
126+
local cluster=some-name
127+
apply_cluster "$test_dir/conf/sharded.yml"
128+
wait_for_running $cluster-rs0 3
129+
wait_for_running $cluster-cfg 3 "false"
130+
wait_for_running $cluster-mongos 3
131+
132+
desc 'enabling servicePerPod for mongos'
133+
kubectl patch psmdb some-name --type=merge -p '{"spec":{"sharding":{"mongos":{"expose":{"servicePerPod":true}}}}}'
134+
wait_for_running $cluster-mongos 3
135+
check_service present $cluster-mongos-0
136+
check_service present $cluster-mongos-1
137+
check_service present $cluster-mongos-2
138+
check_service removed $cluster-mongos
139+
95140
destroy $namespace
96141

97142
desc 'test passed'

pkg/apis/psmdb/v1/psmdb_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ type BalancerSpec struct {
217217
Enabled *bool `json:"enabled,omitempty"`
218218
}
219219

220+
func (b *BalancerSpec) IsEnabled() bool {
221+
return b.Enabled == nil || *b.Enabled
222+
}
223+
220224
type UpgradeOptions struct {
221225
VersionServiceEndpoint string `json:"versionServiceEndpoint,omitempty"`
222226
Apply UpgradeStrategy `json:"apply,omitempty"`

pkg/controller/perconaservermongodb/backup.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,10 +278,15 @@ func (r *ReconcilePerconaServerMongoDB) isRestoreRunning(ctx context.Context, cr
278278
}
279279

280280
for _, rst := range restores.Items {
281-
if rst.Status.State != api.RestoreStateReady && rst.Status.State != api.RestoreStateNew && rst.Status.State != api.RestoreStateError &&
282-
rst.Spec.ClusterName == cr.Name {
283-
return true, nil
281+
if rst.Spec.ClusterName != cr.Name {
282+
continue
283+
}
284+
285+
if rst.Status.State == api.RestoreStateReady || rst.Status.State == api.RestoreStateError {
286+
continue
284287
}
288+
289+
return true, nil
285290
}
286291

287292
return false, nil

0 commit comments

Comments
 (0)