Skip to content

Commit b7aae5c

Browse files
ajarridryomovchrisphoffman
committed
qa: Add tests to validate syncing of images using rbd-mirror
Introduce functional tests to validate that the images under workloads are correctly mirrored between two clusters using snapshot based mirroring. Run workload on a primary image using a krbd or nbd client. Take mirror snapshots of the image under workload. Unmount the mapped image and calculate its MD5 checksum before demoting it. After demotion, wait for the mirror status of the image to be 'up+unknown' in both the clusters. This is to make sure that the non-primary image in the other cluster is ready to be promoted. Now promote the non-primary image in the other cluster. Map the promoted image and calculate its MD5 checksum. Verify that the checksums of the demoted and promoted images in the two clusters are the same. The above test is run as part of two different workunits: - a workunit that validates the syncing of multiple mirrored images with workloads running on them - another workunit that validates the syncing of a single mirrored image with workload running on it and the image is set as primary alternatively between the two clusters, as it happens during failover and failback scenarios. Fixes: https://tracker.ceph.com/issues/61617 Signed-off-by: Ramana Raja <[email protected]> Co-authored-by: Ilya Dryomov <[email protected]> Co-authored-by: Christopher Hoffman <[email protected]>
1 parent ea3a567 commit b7aae5c

File tree

6 files changed

+332
-0
lines changed

6 files changed

+332
-0
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_system_packages:
5+
- pv
6+
tasks:
7+
- workunit:
8+
clients:
9+
cluster1.client.mirror:
10+
- rbd/compare_mirror_image_alternate_primary.sh
11+
env:
12+
RBD_DEVICE_TYPE: 'krbd'
13+
timeout: 3h
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_packages:
5+
- rbd-nbd
6+
extra_system_packages:
7+
- pv
8+
tasks:
9+
- workunit:
10+
clients:
11+
cluster1.client.mirror:
12+
- rbd/compare_mirror_image_alternate_primary.sh
13+
env:
14+
RBD_DEVICE_TYPE: 'nbd'
15+
timeout: 3h
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_system_packages:
5+
- pv
6+
tasks:
7+
- workunit:
8+
clients:
9+
cluster1.client.mirror:
10+
- rbd/compare_mirror_images.sh
11+
env:
12+
RBD_DEVICE_TYPE: 'krbd'
13+
timeout: 3h
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
overrides:
2+
install:
3+
ceph:
4+
extra_packages:
5+
- rbd-nbd
6+
extra_system_packages:
7+
- pv
8+
tasks:
9+
- workunit:
10+
clients:
11+
cluster1.client.mirror:
12+
- rbd/compare_mirror_images.sh
13+
env:
14+
RBD_DEVICE_TYPE: 'nbd'
15+
timeout: 3h
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
IMAGE=image-alternate-primary
6+
MIRROR_IMAGE_MODE=snapshot
7+
MIRROR_POOL_MODE=image
8+
MOUNT=test-alternate-primary
9+
RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
10+
RBD_MIRROR_INSTANCES=1
11+
RBD_MIRROR_MODE=snapshot
12+
RBD_MIRROR_USE_EXISTING_CLUSTER=1
13+
14+
. $(dirname $0)/rbd_mirror_helpers.sh
15+
16+
take_mirror_snapshots() {
17+
local cluster=$1
18+
local pool=$2
19+
local image=$3
20+
21+
for i in {1..30}; do
22+
mirror_image_snapshot $cluster $pool $image
23+
sleep 3
24+
done
25+
}
26+
27+
slow_untar_workload() {
28+
local mountpt=$1
29+
30+
cp linux-5.4.tar.gz $mountpt
31+
# run workload that updates the data and metadata of multiple files on disk.
32+
# rate limit the workload such that the mirror snapshots can be taken as the
33+
# contents of the image are progressively changed by the workload.
34+
local ret=0
35+
timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
36+
| pv -L 256K | tar xf - -C $mountpt" || ret=$?
37+
if ((ret != 124)); then
38+
echo "Workload completed prematurely"
39+
return 1
40+
fi
41+
}
42+
43+
setup
44+
45+
start_mirrors ${CLUSTER1}
46+
start_mirrors ${CLUSTER2}
47+
48+
# initial setup
49+
create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMAGE} \
50+
${RBD_MIRROR_MODE} 10G
51+
52+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
53+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
54+
-o try-netlink ${POOL}/${IMAGE})
55+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
56+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
57+
${POOL}/${IMAGE})
58+
else
59+
echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
60+
exit 1
61+
fi
62+
sudo mkfs.ext4 ${DEV}
63+
mkdir ${MOUNT}
64+
65+
wget https://download.ceph.com/qa/linux-5.4.tar.gz
66+
67+
for i in {1..25}; do
68+
# create mirror snapshots every few seconds under I/O
69+
sudo mount ${DEV} ${MOUNT}
70+
sudo chown $(whoami) ${MOUNT}
71+
rm -rf ${MOUNT}/*
72+
take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMAGE} &
73+
SNAP_PID=$!
74+
slow_untar_workload ${MOUNT}
75+
wait $SNAP_PID
76+
sudo umount ${MOUNT}
77+
78+
# calculate hash before demotion of primary image
79+
DEMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
80+
sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} ${DEV}
81+
82+
demote_image ${CLUSTER1} ${POOL} ${IMAGE}
83+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMAGE} 'up+unknown'
84+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${IMAGE} 'up+unknown'
85+
promote_image ${CLUSTER2} ${POOL} ${IMAGE}
86+
87+
# calculate hash after promotion of secondary image
88+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
89+
DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
90+
-o try-netlink ${POOL}/${IMAGE})
91+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
92+
DEV=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${IMAGE})
93+
fi
94+
PROMOTE_MD5=$(sudo md5sum ${DEV} | awk '{print $1}')
95+
96+
if [[ "${DEMOTE_MD5}" != "${PROMOTE_MD5}" ]]; then
97+
echo "Mismatch at iteration ${i}: ${DEMOTE_MD5} != ${PROMOTE_MD5}"
98+
exit 1
99+
fi
100+
101+
TEMP=${CLUSTER1}
102+
CLUSTER1=${CLUSTER2}
103+
CLUSTER2=${TEMP}
104+
done
105+
106+
echo OK
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
IMG_PREFIX=image-primary
6+
MIRROR_IMAGE_MODE=snapshot
7+
MIRROR_POOL_MODE=image
8+
MNTPT_PREFIX=test-primary
9+
RBD_IMAGE_FEATURES='layering,exclusive-lock,object-map,fast-diff'
10+
RBD_MIRROR_INSTANCES=1
11+
RBD_MIRROR_MODE=snapshot
12+
RBD_MIRROR_USE_EXISTING_CLUSTER=1
13+
14+
. $(dirname $0)/rbd_mirror_helpers.sh
15+
16+
take_mirror_snapshots() {
17+
local cluster=$1
18+
local pool=$2
19+
local image=$3
20+
21+
for i in {1..30}; do
22+
mirror_image_snapshot $cluster $pool $image
23+
sleep 3
24+
done
25+
}
26+
27+
slow_untar_workload() {
28+
local mountpt=$1
29+
30+
cp linux-5.4.tar.gz $mountpt
31+
# run workload that updates the data and metadata of multiple files on disk.
32+
# rate limit the workload such that the mirror snapshots can be taken as the
33+
# contents of the image are progressively changed by the workload.
34+
local ret=0
35+
timeout 5m bash -c "zcat $mountpt/linux-5.4.tar.gz \
36+
| pv -L 256K | tar xf - -C $mountpt" || ret=$?
37+
if ((ret != 124)); then
38+
echo "Workload completed prematurely"
39+
return 1
40+
fi
41+
}
42+
43+
wait_for_image_removal() {
44+
local cluster=$1
45+
local pool=$2
46+
local image=$3
47+
48+
for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
49+
if ! rbd --cluster $cluster ls $pool | grep -wq $image; then
50+
return 0
51+
fi
52+
sleep $s
53+
done
54+
55+
echo "image ${pool}/${image} not removed from cluster ${cluster}"
56+
return 1
57+
}
58+
59+
compare_demoted_promoted_image() {
60+
local dev=${DEVS[$1-1]}
61+
local img=${IMG_PREFIX}$1
62+
local mntpt=${MNTPT_PREFIX}$1
63+
local demote_md5 promote_md5
64+
65+
sudo umount ${mntpt}
66+
67+
# calculate hash before demotion of primary image
68+
demote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
69+
sudo rbd --cluster ${CLUSTER1} device unmap -t ${RBD_DEVICE_TYPE} \
70+
${POOL}/${img}
71+
72+
demote_image ${CLUSTER1} ${POOL} ${img}
73+
wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${img} 'up+unknown'
74+
wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${img} 'up+unknown'
75+
promote_image ${CLUSTER2} ${POOL} ${img}
76+
77+
# calculate hash after promotion of secondary image
78+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
79+
dev=$(sudo rbd --cluster ${CLUSTER2} device map -t nbd \
80+
-o try-netlink ${POOL}/${img})
81+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
82+
dev=$(sudo rbd --cluster ${CLUSTER2} device map -t krbd ${POOL}/${img})
83+
fi
84+
promote_md5=$(sudo md5sum ${dev} | awk '{print $1}')
85+
sudo rbd --cluster ${CLUSTER2} device unmap -t ${RBD_DEVICE_TYPE} ${dev}
86+
87+
if [[ "${demote_md5}" != "${promote_md5}" ]]; then
88+
echo "Mismatch for image ${POOL}/${img}: ${demote_md5} != ${promote_md5}"
89+
return 1
90+
fi
91+
}
92+
93+
setup
94+
95+
start_mirrors ${CLUSTER1}
96+
start_mirrors ${CLUSTER2}
97+
98+
wget https://download.ceph.com/qa/linux-5.4.tar.gz
99+
100+
for i in {1..10}; do
101+
DEVS=()
102+
SNAP_PIDS=()
103+
COMPARE_PIDS=()
104+
WORKLOAD_PIDS=()
105+
RET=0
106+
for j in {1..10}; do
107+
IMG=${IMG_PREFIX}${j}
108+
MNTPT=${MNTPT_PREFIX}${j}
109+
create_image_and_enable_mirror ${CLUSTER1} ${POOL} ${IMG} \
110+
${RBD_MIRROR_MODE} 10G
111+
if [[ $RBD_DEVICE_TYPE == "nbd" ]]; then
112+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t nbd \
113+
-o try-netlink ${POOL}/${IMG})
114+
elif [[ $RBD_DEVICE_TYPE == "krbd" ]]; then
115+
DEV=$(sudo rbd --cluster ${CLUSTER1} device map -t krbd \
116+
${POOL}/${IMG})
117+
else
118+
echo "Unknown RBD_DEVICE_TYPE: ${RBD_DEVICE_TYPE}"
119+
exit 1
120+
fi
121+
DEVS+=($DEV)
122+
sudo mkfs.ext4 ${DEV}
123+
mkdir ${MNTPT}
124+
sudo mount ${DEV} ${MNTPT}
125+
sudo chown $(whoami) ${MNTPT}
126+
# create mirror snapshots under I/O every few seconds
127+
take_mirror_snapshots ${CLUSTER1} ${POOL} ${IMG} &
128+
SNAP_PIDS+=($!)
129+
slow_untar_workload ${MNTPT} &
130+
WORKLOAD_PIDS+=($!)
131+
done
132+
for pid in ${SNAP_PIDS[@]}; do
133+
wait $pid || RET=$?
134+
done
135+
if ((RET != 0)); then
136+
echo "take_mirror_snapshots failed"
137+
exit 1
138+
fi
139+
for pid in ${WORKLOAD_PIDS[@]}; do
140+
wait $pid || RET=$?
141+
done
142+
if ((RET != 0)); then
143+
echo "slow_untar_workload failed"
144+
exit 1
145+
fi
146+
147+
for j in {1..10}; do
148+
compare_demoted_promoted_image $j &
149+
COMPARE_PIDS+=($!)
150+
done
151+
for pid in ${COMPARE_PIDS[@]}; do
152+
wait $pid || RET=$?
153+
done
154+
if ((RET != 0)); then
155+
echo "compare_demoted_promoted_image failed"
156+
exit 1
157+
fi
158+
159+
for j in {1..10}; do
160+
IMG=${IMG_PREFIX}${j}
161+
# Allow for removal of non-primary image by checking that mirroring
162+
# image status is "up+replaying"
163+
wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${IMG}
164+
remove_image ${CLUSTER2} ${POOL} ${IMG}
165+
wait_for_image_removal ${CLUSTER1} ${POOL} ${IMG}
166+
rm -rf ${MNTPT_PREFIX}${j}
167+
done
168+
done
169+
170+
echo OK

0 commit comments

Comments
 (0)