@@ -442,7 +442,6 @@ function TEST_auto_repair_bluestore_basic() {
442442 [' pool_name' ]=" testpool"
443443 [' extras' ]=" --osd_scrub_auto_repair=true"
444444 )
445- local extr_dbg=3
446445 standard_scrub_cluster $dir cluster_conf
447446 local poolid=${cluster_conf['pool_id']}
448447 local poolname=${cluster_conf['pool_name']}
@@ -6252,6 +6251,254 @@ function TEST_request_scrub_priority() {
62526251 grep " log_channel.*scrub ok" $dir /osd.${primary} .log | grep -v purged_snaps | head -1 | sed ' s/.*[[]DBG[]]//' | grep -q $pg || return 1
62536252}
62546253
6254+ #
6255+ # Testing the "split scrub store" feature: shallow scrubs do not
6256+ # purge deep errors from the store.
6257+ #
6258+ # Corrupt one copy of a replicated pool, creating both shallow and deep errors.
6259+ # Then shallow-scrub the pool and verify that the deep errors are still present.
6260+ #
6261+ function TEST_dual_store_replicated_cluster() {
6262+ local dir=$1
6263+ local poolname=csr_pool
6264+ local total_objs=19
6265+ local extr_dbg=1 # note: 3 and above leave some temp files around
6266+
6267+ run_mon $dir a --osd_pool_default_size=2 || return 1
6268+ run_mgr $dir x --mgr_stats_period=1 || return 1
6269+ local ceph_osd_args=" --osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
6270+ ceph_osd_args+=" --osd_scrub_backoff_ratio=0 --osd_stats_update_period_not_scrubbing=3 "
6271+ ceph_osd_args+=" --osd_stats_update_period_scrubbing=2 --osd_op_queue=wpq --osd_scrub_auto_repair=0 "
6272+ for osd in $( seq 0 1)
6273+ do
6274+ run_osd $dir $osd $ceph_osd_args || return 1
6275+ done
6276+
6277+ create_rbd_pool || return 1
6278+ wait_for_clean || return 1
6279+
6280+ create_pool foo 1 || return 1
6281+ create_pool $poolname 1 1 || return 1
6282+ wait_for_clean || return 1
6283+
6284+ ceph osd pool set $poolname noscrub 1
6285+ ceph osd pool set $poolname nodeep-scrub 1
6286+
6287+ for i in $( seq 1 $total_objs ) ; do
6288+ objname=ROBJ${i}
6289+ add_something $dir $poolname $objname || return 1
6290+
6291+ rados --pool $poolname setomapheader $objname hdr-$objname || return 1
6292+ rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
6293+ done
6294+
6295+ # Increase file 1 MB + 1KB
6296+ dd if=/dev/zero of=$dir /new.ROBJ19 bs=1024 count=1025
6297+ rados --pool $poolname put $objname $dir /new.ROBJ19 || return 1
6298+ rm -f $dir /new.ROBJ19
6299+
6300+ local pg=$( get_pg $poolname ROBJ0)
6301+ local primary=$( get_primary $poolname ROBJ0)
6302+
6303+ # Compute an old omap digest and save oi
6304+ CEPH_ARGS=' ' ceph daemon $( get_asok_path osd.0) \
6305+ config set osd_deep_scrub_update_digest_min_age 0
6306+ CEPH_ARGS=' ' ceph daemon $( get_asok_path osd.1) \
6307+ config set osd_deep_scrub_update_digest_min_age 0
6308+ pg_deep_scrub $pg
6309+
6310+ for i in $( seq 1 $total_objs ) ; do
6311+ objname=ROBJ${i}
6312+
6313+ # Alternate corruption between osd.0 and osd.1
6314+ local osd=$( expr $i % 2)
6315+
6316+ case $i in
6317+ 1)
6318+ # Size (deep scrub data_digest too)
6319+ local payload=UVWXYZZZ
6320+ echo $payload > $dir /CORRUPT
6321+ objectstore_tool $dir $osd $objname set-bytes $dir /CORRUPT || return 1
6322+ ;;
6323+
6324+ 2)
6325+ # digest (deep scrub only)
6326+ local payload=UVWXYZ
6327+ echo $payload > $dir /CORRUPT
6328+ objectstore_tool $dir $osd $objname set-bytes $dir /CORRUPT || return 1
6329+ ;;
6330+
6331+ 3)
6332+ # missing
6333+ objectstore_tool $dir $osd $objname remove || return 1
6334+ ;;
6335+
6336+ 4)
6337+ # Modify omap value (deep scrub only)
6338+ objectstore_tool $dir $osd $objname set-omap key-$objname $dir /CORRUPT || return 1
6339+ ;;
6340+
6341+ 5)
6342+ # Delete omap key (deep scrub only)
6343+ objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1
6344+ ;;
6345+
6346+ 6)
6347+ # Add extra omap key (deep scrub only)
6348+ echo extra > $dir /extra-val
6349+ objectstore_tool $dir $osd $objname set-omap key2-$objname $dir /extra-val || return 1
6350+ rm $dir /extra-val
6351+ ;;
6352+
6353+ 7)
6354+ # Modify omap header (deep scrub only)
6355+ echo -n newheader > $dir /hdr
6356+ objectstore_tool $dir $osd $objname set-omaphdr $dir /hdr || return 1
6357+ rm $dir /hdr
6358+ ;;
6359+
6360+ 8)
6361+ rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
6362+ rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
6363+
6364+ # Break xattrs
6365+ echo -n bad-val > $dir /bad-val
6366+ objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir /bad-val || return 1
6367+ objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
6368+ echo -n val3-$objname > $dir /newval
6369+ objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir /newval || return 1
6370+ rm $dir /bad-val $dir /newval
6371+ ;;
6372+
6373+ 9)
6374+ objectstore_tool $dir $osd $objname get-attr _ > $dir /robj9-oi
6375+ echo -n D > $dir /change
6376+ rados --pool $poolname put $objname $dir /change
6377+ objectstore_tool $dir $osd $objname set-attr _ $dir /robj9-oi
6378+ rm $dir /oi $dir /change
6379+ ;;
6380+
6381+ # ROBJ10 must be handled after digests are re-computed by a deep scrub below
6382+ # ROBJ11 must be handled with config change before deep scrub
6383+ # ROBJ12 must be handled with config change before scrubs
6384+ # ROBJ13 must be handled before scrubs
6385+
6386+ 14)
6387+ echo -n bad-val > $dir /bad-val
6388+ objectstore_tool $dir 0 $objname set-attr _ $dir /bad-val || return 1
6389+ objectstore_tool $dir 1 $objname rm-attr _ || return 1
6390+ rm $dir /bad-val
6391+ ;;
6392+
6393+ 15)
6394+ objectstore_tool $dir $osd $objname rm-attr _ || return 1
6395+ ;;
6396+
6397+ 16)
6398+ objectstore_tool $dir 0 $objname rm-attr snapset || return 1
6399+ echo -n bad-val > $dir /bad-val
6400+ objectstore_tool $dir 1 $objname set-attr snapset $dir /bad-val || return 1
6401+ ;;
6402+
6403+ 17)
6404+ # Deep-scrub only (all replicas are diffent than the object info
6405+ local payload=ROBJ17
6406+ echo $payload > $dir /new.ROBJ17
6407+ objectstore_tool $dir 0 $objname set-bytes $dir /new.ROBJ17 || return 1
6408+ objectstore_tool $dir 1 $objname set-bytes $dir /new.ROBJ17 || return 1
6409+ ;;
6410+
6411+ 18)
6412+ # Deep-scrub only (all replicas are diffent than the object info
6413+ local payload=ROBJ18
6414+ echo $payload > $dir /new.ROBJ18
6415+ objectstore_tool $dir 0 $objname set-bytes $dir /new.ROBJ18 || return 1
6416+ objectstore_tool $dir 1 $objname set-bytes $dir /new.ROBJ18 || return 1
6417+ # Make one replica have a different object info, so a full repair must happen too
6418+ objectstore_tool $dir $osd $objname corrupt-info || return 1
6419+ ;;
6420+
6421+ 19)
6422+ # Set osd-max-object-size smaller than this object's size
6423+
6424+ esac
6425+ done
6426+
6427+ local pg=$( get_pg $poolname ROBJ0)
6428+
6429+ ceph tell osd.\* injectargs -- --osd-max-object-size=1048576
6430+
6431+ inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
6432+ inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
6433+ inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
6434+
6435+ # first sequence: the final shallow scrub should not override any of the deep errors
6436+ pg_scrub $pg
6437+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c " $sortkeys " | jq ' .' > /tmp/WQR_1.json
6438+ pg_scrub $pg
6439+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c " $sortkeys " | jq ' .' > /tmp/WQR_1b.json
6440+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | python3 -c " $sortkeys " > $dir /sh1_results.json
6441+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6442+ python3 -c " $sortkeys " > /tmp/WQR_1b_s.json
6443+
6444+ pg_deep_scrub $pg
6445+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c " $sortkeys " | jq ' .' > /tmp/WQR_2.json
6446+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | python3 -c " $sortkeys " > $dir /dp_results.json
6447+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6448+ python3 -c " $sortkeys " > /tmp/WQR_2s.json
6449+
6450+ pg_scrub $pg
6451+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c " $sortkeys " | jq ' .' > /tmp/WQR_3.json
6452+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | python3 -c " $sortkeys " > $dir /sh2_results.json
6453+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6454+ python3 -c " $sortkeys " > /tmp/WQR_3s.json
6455+
6456+ diff -u $dir /dp_results.json $dir /sh2_results.json || return 1
6457+
6458+ # inject a read error, which is a special case: the scrub encountering the read error
6459+ # would override the previously collected shard info.
6460+ inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
6461+
6462+ pg_deep_scrub $pg
6463+
6464+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c " $sortkeys " | jq ' .' > /tmp/WQR_4.json
6465+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6466+ python3 -c " $sortkeys " > /tmp/WQR_4s_w13.json
6467+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | \
6468+ jq ' del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
6469+ jq ' .inconsistents' | python3 -c " $sortkeys " > /tmp/WQR_4s_wo13.json
6470+
6471+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6472+ python3 -c " $sortkeys " > $dir /dpPart2_w13_results.json
6473+ # Remove the entry with "name":"ROBJ13" from the $dir/d*_results.json
6474+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
6475+ jq ' .inconsistents' | python3 -c " $sortkeys " > $dir /dpPart2_wo13_results.json
6476+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6477+ python3 -c " $sortkeys " > /tmp/WQR_4s.json
6478+
6479+ pg_scrub $pg
6480+
6481+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c " $sortkeys " | jq ' .' > /tmp/WQR_5.json
6482+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | \
6483+ python3 -c " $sortkeys " > /tmp/WQR_5s_w13.json
6484+ (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq " $jqfilter " | \
6485+ jq ' del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
6486+ jq ' .inconsistents' | python3 -c " $sortkeys " > /tmp/WQR_5s_wo13.json
6487+
6488+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' .inconsistents' | python3 -c " $sortkeys " > \
6489+ $dir /sh2Part2_w13_results.json
6490+ rados list-inconsistent-obj $pg | jq " $jqfilter " | jq ' del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
6491+ jq ' .inconsistents' | python3 -c " $sortkeys " > $dir /shPart2_wo13_results.json
6492+
6493+ # the shallow scrub results should differ from the results of the deep
6494+ # scrub preceding it, but the difference should be limited to ROBJ13
6495+ diff -u $dir /dpPart2_w13_results.json $dir /sh2Part2_w13_results.json && return 1
6496+ diff -u $dir /dpPart2_wo13_results.json $dir /shPart2_wo13_results.json || return 1
6497+
6498+ ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
6499+ return 0
6500+ }
6501+
62556502
62566503main osd-scrub-repair " $@ "
62576504
0 commit comments