11#! /usr/bin/env bash
22
3- # version 2023-10-30
3+ # version 2024-03-04
44
55# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case
66# bucket index entries for objects in the bucket are somehow lost. It
1313# Because this script must process json objects, the `jq` tool must be
1414# installed on the system.
1515#
16- # Usage: $0 [--proceed] <bucket-name> [data-pool-name]
16+ # Usage: see the usage() function below for details
1717#
1818# This tool is designed to be interactive, allowing the user to
1919# examine the list of objects to be reindexed before
@@ -27,31 +27,64 @@ export TOP_PID=$$
2727# relies on this ordering
2828export LC_ALL=C
2929
30- # temporary files
31- export bkt_entry=/tmp/rgwrbi-bkt-entry.$$
32- export bkt_inst=/tmp/rgwrbi-bkt-inst.$$
33- export marker_ls=/tmp/rgwrbi-marker-ls.$$
34- export obj_list=/tmp/rgwrbi-object-list.$$
35- export obj_list_ver=/tmp/rgwrbi-object-list-ver.$$
36- export obj_reindex_script=/tmp/rgwrbi-object-list-script.$$
37- export zone_info=/tmp/rgwrbi-zone-info.$$
38- export olh_info_enc=/tmp/rgwrbi-olh-info-enc.$$
39- export olh_info_json=/tmp/rgwrbi-olh-info-json.$$
40- export debug_log=/tmp/rgwrbi-debug-log.$$
41-
30+ # whether or not the temporary files are cleaned on completion
4231export clean_temps=1
4332
4433# make explicit tabs easier to see in code
4534export TAB=" "
4635
4736
37+ #
38+ # helper functions
39+ #
40+
41+ super_exit () {
42+ kill -s TERM -${TOP_PID}
43+ }
44+
45+ usage () {
46+ >&2 cat << EOF
47+
48+ Usage: $0 -b <bucket-name> [-l <rados-ls-file>] [-p <pool>] [-y]
49+
50+ where:
51+ -b <bucket-name> Required - name of the bucket to operate on
52+ -l <rados-ls-file> Optional - file containing the output of 'rados ls -p <pool>'
53+ -r <realm-name> Optional - specify the realm if not applying to the default realm"
54+ -g <zonegroup-name> Optional - specify the zonegroup if not applying to the default zonegroup"
55+ -z <zone-name> Optional - specify the zone if not applying to the default zone"
56+ -p <pool> Optional - data pool; if not provided will be inferred from bucket and zone information
57+ -t <tmp-dir> Optional - specify a directory for temporary files other than the default of /tmp
58+ -y Optional - proceed with restoring without confirming with the user
59+ USE WITH CAUTION.
60+ -d Optional - run with debugging output
61+ EOF
62+ super_exit
63+ }
64+
65+ # cleans all temporary files
4866clean () {
4967 if [ " $clean_temps " == 1 ] ; then
50- rm -f $bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver \
51- $obj_reindex_script $ zone_info $olh_info_enc $olh_info_json
68+ rm -f $bkt_entry $temp_file_list
69+ $zone_info $olh_info_enc $olh_info_json
5270 fi
5371}
5472
73+ test_temp_space () {
74+ # use df to determine percentage of data and inodes used; strip
75+ # out spaces and percent signs from the output, so we just have a
76+ # number from 0 to 100
77+ pcent=$( df -k $temp_dir --output=pcent | tail -1 | sed ' s/[ %]//g' )
78+ ipcent=$( df -k $temp_dir --output=ipcent | tail -1 | sed ' s/[ %]//g' )
79+ if [ " $pcent " -eq 100 -o " $ipcent " -eq 100 ] ; then
80+ >&2 echo " ERROR: the temporary directory's partition is full, preventing continuation."
81+ >&2 echo " NOTE: the temporary directory is \" ${temp_dir} \" ."
82+ >&2 df -k $temp_dir -h --output=" target,used,avail,pcent,iused,iavail,ipcent"
83+ >&2 echo " NOTE: cleaning temporary files before exiting...."
84+ super_exit
85+ fi
86+ }
87+
5588# number of seconds for a bucket index pending op to be completed via
5689# dir_suggest mechanism
5790export pending_op_secs=120
@@ -88,33 +121,6 @@ if [ "$exit_code" -ne 0 ] ;then
88121 exit $exit_code
89122fi
90123
91- #
92- # helper functions
93- #
94-
95- super_exit () {
96- kill -s TERM $TOP_PID
97- }
98-
99- usage () {
100- >&2 cat << EOF
101-
102- Usage: $0 -b <bucket-name> [-l <rados-ls-file>] [-p <pool>] [-y]
103-
104- where:
105- -b <bucket-name> Required - name of the bucket to operate on
106- -l <rados-ls-file> Optional - file containing the output of 'rados ls -p <pool>'
107- -r <realm-name> Optional - specify the realm if not applying to the default realm"
108- -g <zonegroup-name> Optional - specify the zonegroup if not applying to the default zonegroup"
109- -z <zone-name> Optional - specify the zone if not applying to the default zone"
110- -p <pool> Optional - data pool; if not provided will be inferred from bucket and zone information
111- -y Optional - proceed with restoring without confirming with the user
112- USE WITH CAUTION.
113- -d Optional - run with debugging output
114- EOF
115- super_exit
116- }
117-
118124# Determines the name of the data pool. Expects the optional
119125# command-line argument to appear as $1 if there is one. The
120126# command-line has the highest priority, then the "explicit_placement"
@@ -137,6 +143,7 @@ get_pool() {
137143 fi
138144
139145 radosgw-admin zone get $multisite_spec > $zone_info 2> /dev/null
146+ test_temp_space
140147 pool=$( jq -r " .placement_pools [] | select(.key | contains(\" ${plmt_pool} \" )) .val .storage_classes.${plmt_class} .data_pool" $zone_info )
141148
142149 if [ -z " $pool " ] ; then
@@ -147,12 +154,13 @@ get_pool() {
147154}
148155
149156export bucket=" "
157+ export temp_dir=/tmp
150158pool=" "
151159multisite_spec=" "
152160lsoutput=" "
153161debug=0
154162
155- while getopts " b:l:p:r:g:z:yd " o; do
163+ while getopts " b:l:p:r:g:z:ydt: " o; do
156164 case " ${o} " in
157165 b)
158166 bucket=" ${OPTARG} "
@@ -187,6 +195,9 @@ while getopts "b:l:p:r:g:z:yd" o; do
187195 debug=1
188196 clean_temps=0
189197 ;;
198+ t)
199+ temp_dir=" ${OPTARG} "
200+ ;;
190201 * )
191202 echo
192203 usage
@@ -202,6 +213,24 @@ else
202213 export debugging_rgwadmin=" 2>/dev/null "
203214fi
204215
216+ if [ ! -d " $temp_dir " ] ; then
217+ echo " ERROR: temporary directory $temp_dir is not a directory"
218+ exit 1
219+ fi
220+
221+ # temporary files
222+ export bkt_entry=${temp_dir} /rgwrbi-bkt-entry.$$
223+ export bkt_inst=${temp_dir} /rgwrbi-bkt-inst.$$
224+ export marker_ls=${temp_dir} /rgwrbi-marker-ls.$$
225+ export obj_list=${temp_dir} /rgwrbi-object-list.$$
226+ export obj_list_ver=${temp_dir} /rgwrbi-object-list-ver.$$
227+ export zone_info=${temp_dir} /rgwrbi-zone-info.$$
228+ export olh_info_enc=${temp_dir} /rgwrbi-olh-info-enc.$$
229+ export olh_info_json=${temp_dir} /rgwrbi-olh-info-json.$$
230+ export debug_log=${temp_dir} /rgwrbi-debug-log.$$
231+
232+ export temp_file_list=" $bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver $zone_info $olh_info_enc $olh_info_json "
233+
205234# special code path for versioned buckets
206235handle_versioned () {
207236 while read o ; do
@@ -213,7 +242,9 @@ handle_versioned() {
213242
214243 # process OLH object; determine final instance or delete-marker
215244 rados -p $pool getxattr $olh_obj user.rgw.olh.info --object-locator " $olh_loc " > $olh_info_enc
245+ test_temp_space
216246 ceph-dencoder import $olh_info_enc type RGWOLHInfo decode dump_json > $olh_info_json
247+ test_temp_space
217248 last_instance=$( jq -r " .target.key.instance" $olh_info_json )
218249 if [ -z " $last_instance " ] ; then
219250 # filters out entry without an instance
@@ -228,6 +259,7 @@ handle_versioned() {
228259 echo " last instance is $last_instance "
229260 echo " filter_out_last_instance is $filter_out_last_instance "
230261 fi >> $debug_log
262+ test_temp_space
231263
232264 # we currently don't need the delete marker, but we can have access to it
233265 # delete_marker=$(jq -r ".removed" $olh_info_json) # true or false
@@ -237,6 +269,7 @@ handle_versioned() {
237269 if [ " $debug " == 1 ] ; then
238270 echo " obj=$obj ; loc=$loc " >> $debug_log
239271 fi
272+ test_temp_space
240273 rados -p $pool stat2 $obj --object-locator " $loc "
241274 done | # output of stat2, which includes mtime
242275 sort -k 3 | # stat2 but sorted by mtime earlier to later
@@ -255,6 +288,8 @@ handle_versioned() {
255288 -e " s/^/${o} \t/"
256289 echo " ${o}${TAB} $last_instance " # now add the final instance; could be delete marker
257290 done < $obj_list 2> /dev/null | sed ' s/\t$//' > $obj_list_ver
291+ test_temp_space
292+
258293} # handle_versioned
259294
260295if [ -z " $bucket " ]; then
265300
266301# read bucket entry metadata
267302eval " radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin $multisite_spec >$bkt_entry "
303+ test_temp_space
268304export marker=$( jq -r " .data.bucket.marker" $bkt_entry )
269305export bucket_id=$( jq -r " .data.bucket.bucket_id" $bkt_entry )
270306if [ -z " $marker " -o -z " $bucket_id " ] ; then
@@ -282,6 +318,7 @@ echo bucket_id is $bucket_id
282318
283319# read bucket instance metadata
284320eval " radosgw-admin metadata get bucket.instance:${bucket} :$bucket_id $multisite_spec $debugging_rgwadmin >$bkt_inst "
321+ test_temp_space
285322
286323# examine number of bucket index shards
287324num_shards=$( jq " .data.bucket_info.num_shards" $bkt_inst )
309346# single.
310347if [ -z " $lsoutput " ]; then
311348 ( rados -p $pool ls | grep " ^${marker} _" > $marker_ls ) 2> /dev/null
349+ test_temp_space
312350else
313351 ( grep " ^${marker} _" " ${lsoutput} " > $marker_ls ) 2> /dev/null
352+ test_temp_space
314353fi
315354
316355( sed -E ' s/\t.*//' $marker_ls | grep -v -E " ^${marker} __[^_]+_" | sed -E " s/^${marker} _(.*)/\1/" | sed ' s/^__/_/' > $obj_list ) 2> /dev/null
356+ test_temp_space
317357
318358# mask bit indicating it's a versioned bucket
319359export is_versioned=$(( $bkt_flags & 2 ))
0 commit comments