|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -ex |
| 4 | + |
| 5 | +FIRST_DAMAGE="first-damage.py" |
| 6 | +FS=cephfs |
| 7 | +METADATA_POOL=cephfs.a.meta |
| 8 | +DATA_POOL=cephfs.a.data |
| 9 | +MOUNT=/mnt1 |
| 10 | +PYTHON=python3 |
| 11 | + |
| 12 | +function usage { |
| 13 | + printf '%s: [--fs=<fs_name>] [--metadata-pool=<pool>] [--first-damage=</path/to/first-damage.py>]\n' |
| 14 | + exit 1 |
| 15 | +} |
| 16 | + |
| 17 | + |
| 18 | +function create { |
| 19 | + ceph config set mds mds_bal_fragment_dirs 0 |
| 20 | + mkdir dir1 |
| 21 | + DIR1_INODE=$(stat -c '%i' dir1) |
| 22 | + touch dir1/file1 |
| 23 | + DIR1_FILE1_INODE=$(stat -c '%i' dir1/file1) |
| 24 | +} |
| 25 | + |
| 26 | +function flush { |
| 27 | + ceph tell mds."$FS":0 flush journal |
| 28 | +} |
| 29 | + |
| 30 | +function damage_backtrace { |
| 31 | + flush |
| 32 | + ceph fs fail "$FS" |
| 33 | + sleep 5 |
| 34 | + |
| 35 | + cephfs-journal-tool --rank="$FS":0 event recover_dentries summary |
| 36 | + # required here as the flush would re-write the below deleted omap |
| 37 | + cephfs-journal-tool --rank="$FS":0 journal reset |
| 38 | + |
| 39 | + #remove dir1/file1 omap entry from metadata pool |
| 40 | + local DIS=$(printf '%llx.%08llx' "$DIR1_INODE" 0) |
| 41 | + rados --pool="$METADATA_POOL" rmomapkey "$DIS" "file1_head" |
| 42 | + |
| 43 | + #remove backtrace |
| 44 | + local FIS=$(printf '%llx.%08llx' "$DIR1_FILE1_INODE" 0) |
| 45 | + rados --pool="$DATA_POOL" rmxattr "$FIS" "parent" |
| 46 | + |
| 47 | + ceph fs set "$FS" joinable true |
| 48 | + sleep 5 |
| 49 | +} |
| 50 | + |
| 51 | +function damage_lost_found { |
| 52 | + flush |
| 53 | + ceph fs fail "$FS" |
| 54 | + sleep 5 |
| 55 | + local IS=$(printf '%llx.%08llx' "1" 0) |
| 56 | + |
| 57 | + local T=$(mktemp -p /tmp) |
| 58 | + # nuke head version of "lost+found" |
| 59 | + rados --pool="$METADATA_POOL" getomapval "$IS" lost+found_head "$T" |
| 60 | + printf '\xff\xff\xff\xf0' | dd of="$T" count=4 bs=1 conv=notrunc,nocreat |
| 61 | + rados --pool="$METADATA_POOL" setomapval "$IS" lost+found_head --input-file="$T" |
| 62 | + ceph fs set "$FS" joinable true |
| 63 | + sleep 5 |
| 64 | +} |
| 65 | + |
| 66 | +function recover_damaged_backtrace_file { |
| 67 | + flush |
| 68 | + ceph fs fail "$FS" |
| 69 | + sleep 5 |
| 70 | + |
| 71 | + cephfs-journal-tool --rank="$FS":0 journal reset |
| 72 | + |
| 73 | + #creates lost+found directory and recovers the damaged backtrace file |
| 74 | + cephfs-data-scan cleanup |
| 75 | + cephfs-data-scan init |
| 76 | + cephfs-data-scan scan_extents |
| 77 | + cephfs-data-scan scan_inodes |
| 78 | + cephfs-data-scan scan_links |
| 79 | + |
| 80 | + ceph fs set "$FS" joinable true |
| 81 | + sleep 5 |
| 82 | +} |
| 83 | + |
| 84 | +function recover { |
| 85 | + flush |
| 86 | + ceph fs fail "$FS" |
| 87 | + sleep 5 |
| 88 | + cephfs-journal-tool --rank="$FS":0 event recover_dentries summary |
| 89 | + cephfs-journal-tool --rank="$FS":0 journal reset |
| 90 | + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL" |
| 91 | + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap "$METADATA_POOL" |
| 92 | + "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL" |
| 93 | + ceph fs set "$FS" joinable true |
| 94 | + sleep 5 |
| 95 | +} |
| 96 | + |
| 97 | +function check_lost_found { |
| 98 | + stat lost+found || exit 2 |
| 99 | +} |
| 100 | +function check { |
| 101 | + if stat lost+found; then |
| 102 | + echo should be gone |
| 103 | + exit 1 |
| 104 | + fi |
| 105 | +} |
| 106 | + |
| 107 | +function mount { |
| 108 | + #sudo --preserve-env=CEPH_CONF bin/mount.ceph :/ "$MOUNT" -o name=admin,noshare |
| 109 | + sudo bin/ceph-fuse -c ./ceph.conf /mnt1 |
| 110 | + df -h "$MOUNT" |
| 111 | +} |
| 112 | + |
| 113 | +function main { |
| 114 | + eval set -- $(getopt --name "$0" --options '' --longoptions 'help,fs:,metadata-pool:,first-damage:,mount:,python:' -- "$@") |
| 115 | + |
| 116 | + while [ "$#" -gt 0 ]; do |
| 117 | + echo "$*" |
| 118 | + echo "$1" |
| 119 | + case "$1" in |
| 120 | + -h|--help) |
| 121 | + usage |
| 122 | + ;; |
| 123 | + --fs) |
| 124 | + FS="$2" |
| 125 | + shift 2 |
| 126 | + ;; |
| 127 | + --metadata-pool) |
| 128 | + METADATA_POOL="$2" |
| 129 | + shift 2 |
| 130 | + ;; |
| 131 | + --mount) |
| 132 | + MOUNT="$2" |
| 133 | + shift 2 |
| 134 | + ;; |
| 135 | + --first-damage) |
| 136 | + FIRST_DAMAGE="$2" |
| 137 | + shift 2 |
| 138 | + ;; |
| 139 | + --python) |
| 140 | + PYTHON="$2" |
| 141 | + shift 2 |
| 142 | + ;; |
| 143 | + --) |
| 144 | + shift |
| 145 | + break |
| 146 | + ;; |
| 147 | + *) |
| 148 | + usage |
| 149 | + ;; |
| 150 | + esac |
| 151 | + done |
| 152 | + |
| 153 | + mount |
| 154 | + |
| 155 | + pushd "$MOUNT" |
| 156 | + create |
| 157 | + popd |
| 158 | + |
| 159 | + sudo umount -f "$MOUNT" |
| 160 | + |
| 161 | + # flush dentries/inodes to omap |
| 162 | + flush |
| 163 | + |
| 164 | + damage_backtrace |
| 165 | + # creates lost+found directory |
| 166 | + recover_damaged_backtrace_file |
| 167 | + |
| 168 | + sleep 5 # for mds to join |
| 169 | + mount |
| 170 | + pushd "$MOUNT" |
| 171 | + sleep 5 # wait for mount to complete |
| 172 | + |
| 173 | + # check lost+found is created |
| 174 | + check_lost_found |
| 175 | + popd |
| 176 | + sudo umount -f "$MOUNT" |
| 177 | + # flush dentries/inodes to omap |
| 178 | + flush |
| 179 | + |
| 180 | + # damage lost+found directory |
| 181 | + damage_lost_found |
| 182 | + recover |
| 183 | + |
| 184 | + mount |
| 185 | + |
| 186 | + pushd "$MOUNT" |
| 187 | + sleep 5 # wait for mount to complete |
| 188 | + |
| 189 | + #check 'lost+found' dentry should be gone |
| 190 | + check |
| 191 | + popd |
| 192 | + |
| 193 | + sudo umount -f "$MOUNT" |
| 194 | +} |
| 195 | + |
| 196 | +main "$@" |
0 commit comments