Skip to content

Commit fa5ef87

Browse files
authored
Merge pull request ceph#54802 from ajarr/wip-61617
qa: Add tests to validate synced images on rbd-mirror Reviewed-by: Ilya Dryomov <[email protected]>
2 parents c72704b + b7aae5c commit fa5ef87

7 files changed

+347
-16
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_system_packages:
5+
- pv
6+
tasks:
7+
- workunit:
8+
clients:
9+
cluster1.client.mirror:
10+
- rbd/compare_mirror_image_alternate_primary.sh
11+
env:
12+
RBD_DEVICE_TYPE: 'krbd'
13+
timeout: 3h
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_packages:
5+
- rbd-nbd
6+
extra_system_packages:
7+
- pv
8+
tasks:
9+
- workunit:
10+
clients:
11+
cluster1.client.mirror:
12+
- rbd/compare_mirror_image_alternate_primary.sh
13+
env:
14+
RBD_DEVICE_TYPE: 'nbd'
15+
timeout: 3h
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_system_packages:
5+
- pv
6+
tasks:
7+
- workunit:
8+
clients:
9+
cluster1.client.mirror:
10+
- rbd/compare_mirror_images.sh
11+
env:
12+
RBD_DEVICE_TYPE: 'krbd'
13+
timeout: 3h
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_packages:
5+
- rbd-nbd
6+
extra_system_packages:
7+
- pv
8+
tasks:
9+
- workunit:
10+
clients:
11+
cluster1.client.mirror:
12+
- rbd/compare_mirror_images.sh
13+
env:
14+
RBD_DEVICE_TYPE: 'nbd'
15+
timeout: 3h
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
IMAGE=image-alternate-primary
6+
MIRROR_IMAGE_MODE=snapshot
7+
MIRROR_POOL_MODE=image
8+
MOUNT=test-alternate-primary
9+
RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
10+
RBD_MIRROR_INSTANCES=1
11+
RBD_MIRROR_MODE=snapshot
12+
RBD_MIRROR_USE_EXISTING_CLUSTER=1
13+
14+
. $(dirname $0)/rbd_mirror_helpers.sh
15+
16+
take_mirror_snapshots() {
17+
local cluster=$1
18+
local pool=$2
19+
local image=$3
20+
21+
for i in {1..30}; do
22+
mirror_image_snapshot $cluster $pool $image
23+
sleep 3
24+
done
25+
}
26+
27+
slow_untar_workload() {
28+
local mountpt=$1
29+
30+
cp linux-5.4.tar.gz $mountpt
31+
# run workload that updates the data and metadata of multiple files on disk.
32+
# rate limit the workload such that the mirror snapshots can be taken as the
33+
# contents of the image are progressively changed by the workload.
34+
local ret=0
35+
timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
36+
| pv -L 256K | tar xf - -C $mountpt" || ret=$?
37+
if ((ret != 124)); then
38+
echo "Workload completed prematurely"
39+
return 1
40+
fi
41+
}
42+
43+
setup
44+
45+
start_mirrors ${CLUSTER1}
46+
start_mirrors ${CLUSTER2}
47+
48+
# initial setup
49+
create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \
50+
${RBD_MIRROR_MODE} 10G
51+
52+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
53+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
54+
-o try-netlink ${POOL}/${IMAGE})
55+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
56+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
57+
${POOL}/${IMAGE})
58+
else
59+
echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
60+
exit 1
61+
fi
62+
sudo mkfs.ext4 ${DEV}
63+
mkdir ${MOUNT}
64+
65+
wget https://download.ceph.com/qa/linux-5.4.tar.gz
66+
67+
for i in {1..25}; do
68+
# create mirror snapshots every few seconds under I/O
69+
sudo mount ${DEV} ${MOUNT}
70+
sudo chown $(whoami) ${MOUNT}
71+
rm -rf ${MOUNT}/*
72+
take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} &
73+
SNAP_PID=$!
74+
slow_untar_workload ${MOUNT}
75+
wait $SNAP_PID
76+
sudo umount ${MOUNT}
77+
78+
# calculate hash before demotion of primary image
79+
DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
80+
sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV}
81+
82+
demote_image ${CLUSTER1} ${POOL} ${IMAGE}
83+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown'
84+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown'
85+
promote_image ${CLUSTER2} ${POOL} ${IMAGE}
86+
87+
# calculate hash after promotion of secondary image
88+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
89+
DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
90+
-o try-netlink ${POOL}/${IMAGE})
91+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
92+
DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE})
93+
fi
94+
PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
95+
96+
if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then
97+
echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}"
98+
exit 1
99+
fi
100+
101+
TEMP=${CLUSTER1}
102+
CLUSTER1=${CLUSTER2}
103+
CLUSTER2=${TEMP}
104+
done
105+
106+
echo OK
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
IMG_PREFIX=image-primary
6+
MIRROR_IMAGE_MODE=snapshot
7+
MIRROR_POOL_MODE=image
8+
MNTPT_PREFIX=test-primary
9+
RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
10+
RBD_MIRROR_INSTANCES=1
11+
RBD_MIRROR_MODE=snapshot
12+
RBD_MIRROR_USE_EXISTING_CLUSTER=1
13+
14+
. $(dirname $0)/rbd_mirror_helpers.sh
15+
16+
take_mirror_snapshots() {
17+
local cluster=$1
18+
local pool=$2
19+
local image=$3
20+
21+
for i in {1..30}; do
22+
mirror_image_snapshot $cluster $pool $image
23+
sleep 3
24+
done
25+
}
26+
27+
slow_untar_workload() {
28+
local mountpt=$1
29+
30+
cp linux-5.4.tar.gz $mountpt
31+
# run workload that updates the data and metadata of multiple files on disk.
32+
# rate limit the workload such that the mirror snapshots can be taken as the
33+
# contents of the image are progressively changed by the workload.
34+
local ret=0
35+
timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
36+
| pv -L 256K | tar xf - -C $mountpt" || ret=$?
37+
if ((ret != 124)); then
38+
echo "Workload completed prematurely"
39+
return 1
40+
fi
41+
}
42+
43+
wait_for_image_removal() {
44+
local cluster=$1
45+
local pool=$2
46+
local image=$3
47+
48+
for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
49+
if ! rbd --cluster $cluster ls $pool | grep -wq $image; then
50+
return 0
51+
fi
52+
sleep $s
53+
done
54+
55+
echo "image ${pool}/${image} not removed from cluster ${cluster}"
56+
return 1
57+
}
58+
59+
compare_demoted_promoted_image() {
60+
local dev=${DEVS[$1-1]}
61+
local img=${IMG_PREFIX}$1
62+
local mntpt=${MNTPT_PREFIX}$1
63+
local demote_md5 promote_md5
64+
65+
sudo umount ${mntpt}
66+
67+
# calculate hash before demotion of primary image
68+
demote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
69+
sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \
70+
${POOL}/${img}
71+
72+
demote_image ${CLUSTER1} ${POOL} ${img}
73+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown'
74+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown'
75+
promote_image ${CLUSTER2} ${POOL} ${img}
76+
77+
# calculate hash after promotion of secondary image
78+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
79+
dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
80+
-o try-netlink ${POOL}/${img})
81+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
82+
dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img})
83+
fi
84+
promote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
85+
sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev}
86+
87+
if [[ "${demote_md5}" != "${promote_md5}" ]]; then
88+
echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}"
89+
return 1
90+
fi
91+
}
92+
93+
setup
94+
95+
start_mirrors ${CLUSTER1}
96+
start_mirrors ${CLUSTER2}
97+
98+
wget https://download.ceph.com/qa/linux-5.4.tar.gz
99+
100+
for i in {1..10}; do
101+
DEVS=()
102+
SNAP_PIDS=()
103+
COMPARE_PIDS=()
104+
WORKLOAD_PIDS=()
105+
RET=0
106+
for j in {1..10}; do
107+
IMG=${IMG_PREFIX}${j}
108+
MNTPT=${MNTPT_PREFIX}${j}
109+
create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \
110+
${RBD_MIRROR_MODE} 10G
111+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
112+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
113+
-o try-netlink ${POOL}/${IMG})
114+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
115+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
116+
${POOL}/${IMG})
117+
else
118+
echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
119+
exit 1
120+
fi
121+
DEVS+=($DEV)
122+
sudo mkfs.ext4 ${DEV}
123+
mkdir ${MNTPT}
124+
sudo mount ${DEV} ${MNTPT}
125+
sudo chown $(whoami) ${MNTPT}
126+
# create mirror snapshots under I/O every few seconds
127+
take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} &
128+
SNAP_PIDS+=($!)
129+
slow_untar_workload ${MNTPT} &
130+
WORKLOAD_PIDS+=($!)
131+
done
132+
for pid in ${SNAP_PIDS[@]}; do
133+
wait $pid || RET=$?
134+
done
135+
if ((RET != 0)); then
136+
echo "take_mirror_snapshots failed"
137+
exit 1
138+
fi
139+
for pid in ${WORKLOAD_PIDS[@]}; do
140+
wait $pid || RET=$?
141+
done
142+
if ((RET != 0)); then
143+
echo "slow_untar_workload failed"
144+
exit 1
145+
fi
146+
147+
for j in {1..10}; do
148+
compare_demoted_promoted_image $j &
149+
COMPARE_PIDS+=($!)
150+
done
151+
for pid in ${COMPARE_PIDS[@]}; do
152+
wait $pid || RET=$?
153+
done
154+
if ((RET != 0)); then
155+
echo "compare_demoted_promoted_image failed"
156+
exit 1
157+
fi
158+
159+
for j in {1..10}; do
160+
IMG=${IMG_PREFIX}${j}
161+
# Allow for removal of non-primary image by checking that mirroring
162+
# image status is "up+replaying"
163+
wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG}
164+
remove_image ${CLUSTER2} ${POOL} ${IMG}
165+
wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG}
166+
rm -rf ${MNTPT_PREFIX}${j}
167+
done
168+
done
169+
170+
echo OK

qa/workunits/rbd/rbd_mirror_helpers.sh

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -823,23 +823,23 @@ test_status_in_pool_dir()
823823
local description_pattern="$5"
824824
local service_pattern="$6"
825825

826-
local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}-${image}.mirror_status)
827-
CEPH_ARGS='' rbd --cluster ${cluster} mirror image status ${pool}/${image} |
828-
tee ${status_log} >&2
829-
grep "^ state: .*${state_pattern}" ${status_log} || return 1
830-
grep "^ description: .*${description_pattern}" ${status_log} || return 1
826+
local status
827+
status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror image status \
828+
${pool}/${image})
829+
grep "^ state: .*${state_pattern}" <<< "$status" || return 1
830+
grep "^ description: .*${description_pattern}" <<< "$status" || return 1
831831

832832
if [ -n "${service_pattern}" ]; then
833-
grep "service: *${service_pattern}" ${status_log} || return 1
833+
grep "service: *${service_pattern}" <<< "$status" || return 1
834834
elif echo ${state_pattern} | grep '^up+'; then
835-
grep "service: *${MIRROR_USER_ID_PREFIX}.* on " ${status_log} || return 1
835+
grep "service: *${MIRROR_USER_ID_PREFIX}.* on " <<< "$status" || return 1
836836
else
837-
grep "service: " ${status_log} && return 1
837+
grep "service: " <<< "$status" && return 1
838838
fi
839839

840840
# recheck using `mirror pool status` command to stress test it.
841-
842-
local last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' ${status_log})"
841+
local last_update
842+
last_update="$(sed -nEe 's/^ last_update: *(.*) *$/\1/p' <<< "$status")"
843843
test_mirror_pool_status_verbose \
844844
${cluster} ${pool} ${image} "${state_pattern}" "${last_update}" &&
845845
return 0
@@ -856,16 +856,15 @@ test_mirror_pool_status_verbose()
856856
local state_pattern="$4"
857857
local prev_last_update="$5"
858858

859-
local status_log=${TEMPDIR}/$(mkfname ${cluster}-${pool}.mirror_status)
860-
861-
rbd --cluster ${cluster} mirror pool status ${pool} --verbose --format xml \
862-
> ${status_log}
859+
local status
860+
status=$(CEPH_ARGS='' rbd --cluster ${cluster} mirror pool status ${pool} \
861+
--verbose --format xml)
863862

864863
local last_update state
865864
last_update=$($XMLSTARLET sel -t -v \
866-
"//images/image[name='${image}']/last_update" < ${status_log})
865+
"//images/image[name='${image}']/last_update" <<< "$status")
867866
state=$($XMLSTARLET sel -t -v \
868-
"//images/image[name='${image}']/state" < ${status_log})
867+
"//images/image[name='${image}']/state" <<< "$status")
869868

870869
echo "${state}" | grep "${state_pattern}" ||
871870
test "${last_update}" '>' "${prev_last_update}"

0 commit comments

Comments
 (0)