|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -ex |
| 3 | + |
| 4 | +# This testcase tests the scenario of the 'ceph fs subvolume ls' mgr command |
| 5 | +# when the osd is full. The command used to miss out few subvolumes in the list. |
| 6 | +# The issue happens in the multi-mds active setup. Please see the tracker |
| 7 | +# https://tracker.ceph.com/issues/72260 |
| 8 | + |
| 9 | +# The suite sets the 'bluestore block size' to 2GiB. So, the osd is of the |
| 10 | +# size 2GiB. The 25 subvolumes are created and a 1GB file is written on the |
| 11 | +# root. The full-ratios are set such that, the data less than 500MB is |
| 12 | +# treated as osd full. Now, subvolumes are listed 20 times with mgr failover |
| 13 | +# (to invalidate readdir cache) and validated each time. |
| 14 | + |
| 15 | +SUBVOL_CNT=25 |
| 16 | + |
| 17 | +expect_failure() { |
| 18 | + if "$@"; then return 1; else return 0; fi |
| 19 | +} |
| 20 | +validate_subvol_cnt() { |
| 21 | + if [ $1 -eq $SUBVOL_CNT ]; then return 0; else return 1; fi |
| 22 | +} |
| 23 | +restart_mgr() { |
| 24 | + ceph mgr fail x |
| 25 | + timeout=30 |
| 26 | + while [ $timeout -gt 0 ] |
| 27 | + do |
| 28 | + active_mgr_cnt=$(ceph status | grep mgr | grep active | grep -v no | wc -l) |
| 29 | + if [ $active_mgr_cnt -eq 1 ]; then break; fi |
| 30 | + echo "Waiting for mgr to be active after failover: $timeout" |
| 31 | + sleep 1 |
| 32 | + let "timeout-=1" |
| 33 | + done |
| 34 | +} |
| 35 | + |
| 36 | +#Set client_use_random_mds |
| 37 | +ceph config set client client_use_random_mds true |
| 38 | + |
| 39 | +#Set max_mds to 3 |
| 40 | +ceph fs set cephfs max_mds 3 |
| 41 | +timeout=30 |
| 42 | +while [ $timeout -gt 0 ] |
| 43 | +do |
| 44 | + active_cnt=$(ceph fs status | grep active | wc -l) |
| 45 | + if [ $active_cnt -eq 2 ]; then break; fi |
| 46 | + echo "Wating for max_mds to be 2: $timeout" |
| 47 | + sleep 1 |
| 48 | + let "timeout-=1" |
| 49 | +done |
| 50 | + |
| 51 | +#Create subvolumes |
| 52 | +for i in $(seq 1 $SUBVOL_CNT); do ceph fs subvolume create cephfs sub_$i; done |
| 53 | + |
| 54 | +#For debugging |
| 55 | +echo "Before write" |
| 56 | +df -h |
| 57 | +ceph osd df |
| 58 | + |
| 59 | +sudo dd if=/dev/urandom of=$CEPH_MNT/1GB_file-1 status=progress bs=1M count=1000 |
| 60 | + |
| 61 | +# The suite (qa/suites/fs/full/tasks/mgr-osd-full.yaml) sets the 'bluestore block size' |
| 62 | +# to 2GiB. So, the osd is of the size 2GiB. The full-ratios are set below makes sure |
| 63 | +# that the data less than 500MB is treated as osd full. |
| 64 | +ceph osd set-full-ratio 0.2 |
| 65 | +ceph osd set-nearfull-ratio 0.16 |
| 66 | +ceph osd set-backfillfull-ratio 0.18 |
| 67 | + |
| 68 | +timeout=30 |
| 69 | +while [ $timeout -gt 0 ] |
| 70 | +do |
| 71 | + health=$(ceph health detail) |
| 72 | + [[ $health = *"OSD_FULL"* ]] && echo "OSD is full" && break |
| 73 | + echo "Waiting for osd to be full: $timeout" |
| 74 | + sleep 1 |
| 75 | + let "timeout-=1" |
| 76 | +done |
| 77 | + |
| 78 | +#For debugging |
| 79 | +echo "After ratio set" |
| 80 | +df -h |
| 81 | +ceph osd df |
| 82 | + |
| 83 | +#Clear readdir cache by failing over mgr which forces to use new libcephfs connection |
| 84 | +#Validate subvolume ls 20 times |
| 85 | +for i in {1..20}; |
| 86 | +do |
| 87 | + restart_mgr |
| 88 | + #List and validate subvolumes count |
| 89 | + subvol_cnt=$(ceph fs subvolume ls cephfs --format=json-pretty | grep sub_ | wc -l) |
| 90 | + validate_subvol_cnt $subvol_cnt |
| 91 | +done |
| 92 | + |
| 93 | +#Delete all subvolumes |
| 94 | +for i in $(seq 1 $SUBVOL_CNT); do ceph fs subvolume rm cephfs sub_$i; done |
| 95 | + |
| 96 | +#Wait for subvolume to delete data |
| 97 | +trashdir=$CEPH_MNT/volumes/_deleting |
| 98 | +timeout=30 |
| 99 | +while [ $timeout -gt 0 ] |
| 100 | +do |
| 101 | + [ -z "$(sudo ls -A $trashdir)" ] && echo "Trash directory $trashdir is empty" && break |
| 102 | + echo "Waiting for trash dir to be empty: $timeout" |
| 103 | + sleep 1 |
| 104 | + let "timeout-=1" |
| 105 | +done |
| 106 | + |
| 107 | +sudo rm -f $CEPH_MNT/1GB_file-1 |
| 108 | + |
| 109 | +#Set the ratios back for other full tests to run |
| 110 | +ceph osd set-full-ratio 0.95 |
| 111 | +ceph osd set-nearfull-ratio 0.95 |
| 112 | +ceph osd set-backfillfull-ratio 0.95 |
| 113 | + |
| 114 | +#After test |
| 115 | +echo "After test" |
| 116 | +df -h |
| 117 | +ceph osd df |
| 118 | + |
| 119 | +echo OK |
0 commit comments