Skip to content

Commit 4a5715f

Browse files
authored
Merge pull request ceph#59942 from ronen-fr/wip-rf-store2-steps
osd/scrub: separate shallow vs deep errors storage Reviewed-by: Samuel Just <[email protected]>
2 parents 4298b7e + 4f1ef85 commit 4a5715f

File tree

8 files changed

+875
-114
lines changed

8 files changed

+875
-114
lines changed

qa/standalone/scrub/osd-scrub-repair.sh

Lines changed: 248 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,6 @@ function TEST_auto_repair_bluestore_basic() {
442442
['pool_name']="testpool"
443443
['extras']=" --osd_scrub_auto_repair=true"
444444
)
445-
local extr_dbg=3
446445
standard_scrub_cluster $dir cluster_conf
447446
local poolid=${cluster_conf['pool_id']}
448447
local poolname=${cluster_conf['pool_name']}
@@ -6252,6 +6251,254 @@ function TEST_request_scrub_priority() {
62526251
grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
62536252
}
62546253

6254+
#
6255+
# Testing the "split scrub store" feature: shallow scrubs do not
6256+
# purge deep errors from the store.
6257+
#
6258+
# Corrupt one copy of a replicated pool, creating both shallow and deep errors.
6259+
# Then shallow-scrub the pool and verify that the deep errors are still present.
6260+
#
6261+
function TEST_dual_store_replicated_cluster() {
6262+
local dir=$1
6263+
local poolname=csr_pool
6264+
local total_objs=19
6265+
local extr_dbg=1 # note: 3 and above leave some temp files around
6266+
6267+
run_mon $dir a --osd_pool_default_size=2 || return 1
6268+
run_mgr $dir x --mgr_stats_period=1 || return 1
6269+
local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
6270+
ceph_osd_args+="--osd_scrub_backoff_ratio=0 --osd_stats_update_period_not_scrubbing=3 "
6271+
ceph_osd_args+="--osd_stats_update_period_scrubbing=2 --osd_op_queue=wpq --osd_scrub_auto_repair=0 "
6272+
for osd in $(seq 0 1)
6273+
do
6274+
run_osd $dir $osd $ceph_osd_args || return 1
6275+
done
6276+
6277+
create_rbd_pool || return 1
6278+
wait_for_clean || return 1
6279+
6280+
create_pool foo 1 || return 1
6281+
create_pool $poolname 1 1 || return 1
6282+
wait_for_clean || return 1
6283+
6284+
ceph osd pool set $poolname noscrub 1
6285+
ceph osd pool set $poolname nodeep-scrub 1
6286+
6287+
for i in $(seq 1 $total_objs) ; do
6288+
objname=ROBJ${i}
6289+
add_something $dir $poolname $objname || return 1
6290+
6291+
rados --pool $poolname setomapheader $objname hdr-$objname || return 1
6292+
rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
6293+
done
6294+
6295+
# Increase file 1 MB + 1KB
6296+
dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025
6297+
rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1
6298+
rm -f $dir/new.ROBJ19
6299+
6300+
local pg=$(get_pg $poolname ROBJ0)
6301+
local primary=$(get_primary $poolname ROBJ0)
6302+
6303+
# Compute an old omap digest and save oi
6304+
CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \
6305+
config set osd_deep_scrub_update_digest_min_age 0
6306+
CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \
6307+
config set osd_deep_scrub_update_digest_min_age 0
6308+
pg_deep_scrub $pg
6309+
6310+
for i in $(seq 1 $total_objs) ; do
6311+
objname=ROBJ${i}
6312+
6313+
# Alternate corruption between osd.0 and osd.1
6314+
local osd=$(expr $i % 2)
6315+
6316+
case $i in
6317+
1)
6318+
# Size (deep scrub data_digest too)
6319+
local payload=UVWXYZZZ
6320+
echo $payload > $dir/CORRUPT
6321+
objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
6322+
;;
6323+
6324+
2)
6325+
# digest (deep scrub only)
6326+
local payload=UVWXYZ
6327+
echo $payload > $dir/CORRUPT
6328+
objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
6329+
;;
6330+
6331+
3)
6332+
# missing
6333+
objectstore_tool $dir $osd $objname remove || return 1
6334+
;;
6335+
6336+
4)
6337+
# Modify omap value (deep scrub only)
6338+
objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1
6339+
;;
6340+
6341+
5)
6342+
# Delete omap key (deep scrub only)
6343+
objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1
6344+
;;
6345+
6346+
6)
6347+
# Add extra omap key (deep scrub only)
6348+
echo extra > $dir/extra-val
6349+
objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1
6350+
rm $dir/extra-val
6351+
;;
6352+
6353+
7)
6354+
# Modify omap header (deep scrub only)
6355+
echo -n newheader > $dir/hdr
6356+
objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1
6357+
rm $dir/hdr
6358+
;;
6359+
6360+
8)
6361+
rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
6362+
rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
6363+
6364+
# Break xattrs
6365+
echo -n bad-val > $dir/bad-val
6366+
objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
6367+
objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
6368+
echo -n val3-$objname > $dir/newval
6369+
objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
6370+
rm $dir/bad-val $dir/newval
6371+
;;
6372+
6373+
9)
6374+
objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi
6375+
echo -n D > $dir/change
6376+
rados --pool $poolname put $objname $dir/change
6377+
objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi
6378+
rm $dir/oi $dir/change
6379+
;;
6380+
6381+
# ROBJ10 must be handled after digests are re-computed by a deep scrub below
6382+
# ROBJ11 must be handled with config change before deep scrub
6383+
# ROBJ12 must be handled with config change before scrubs
6384+
# ROBJ13 must be handled before scrubs
6385+
6386+
14)
6387+
echo -n bad-val > $dir/bad-val
6388+
objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1
6389+
objectstore_tool $dir 1 $objname rm-attr _ || return 1
6390+
rm $dir/bad-val
6391+
;;
6392+
6393+
15)
6394+
objectstore_tool $dir $osd $objname rm-attr _ || return 1
6395+
;;
6396+
6397+
16)
6398+
objectstore_tool $dir 0 $objname rm-attr snapset || return 1
6399+
echo -n bad-val > $dir/bad-val
6400+
objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
6401+
;;
6402+
6403+
17)
6404+
# Deep-scrub only (all replicas are diffent than the object info
6405+
local payload=ROBJ17
6406+
echo $payload > $dir/new.ROBJ17
6407+
objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1
6408+
objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1
6409+
;;
6410+
6411+
18)
6412+
# Deep-scrub only (all replicas are diffent than the object info
6413+
local payload=ROBJ18
6414+
echo $payload > $dir/new.ROBJ18
6415+
objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1
6416+
objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1
6417+
# Make one replica have a different object info, so a full repair must happen too
6418+
objectstore_tool $dir $osd $objname corrupt-info || return 1
6419+
;;
6420+
6421+
19)
6422+
# Set osd-max-object-size smaller than this object's size
6423+
6424+
esac
6425+
done
6426+
6427+
local pg=$(get_pg $poolname ROBJ0)
6428+
6429+
ceph tell osd.\* injectargs -- --osd-max-object-size=1048576
6430+
6431+
inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
6432+
inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
6433+
inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
6434+
6435+
# first sequence: the final shallow scrub should not override any of the deep errors
6436+
pg_scrub $pg
6437+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_1.json
6438+
pg_scrub $pg
6439+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_1b.json
6440+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/sh1_results.json
6441+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6442+
python3 -c "$sortkeys" > /tmp/WQR_1b_s.json
6443+
6444+
pg_deep_scrub $pg
6445+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_2.json
6446+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/dp_results.json
6447+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6448+
python3 -c "$sortkeys" > /tmp/WQR_2s.json
6449+
6450+
pg_scrub $pg
6451+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_3.json
6452+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/sh2_results.json
6453+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6454+
python3 -c "$sortkeys" > /tmp/WQR_3s.json
6455+
6456+
diff -u $dir/dp_results.json $dir/sh2_results.json || return 1
6457+
6458+
# inject a read error, which is a special case: the scrub encountering the read error
6459+
# would override the previously collected shard info.
6460+
inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
6461+
6462+
pg_deep_scrub $pg
6463+
6464+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_4.json
6465+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6466+
python3 -c "$sortkeys" > /tmp/WQR_4s_w13.json
6467+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | \
6468+
jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
6469+
jq '.inconsistents' | python3 -c "$sortkeys" > /tmp/WQR_4s_wo13.json
6470+
6471+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6472+
python3 -c "$sortkeys" > $dir/dpPart2_w13_results.json
6473+
# Remove the entry with "name":"ROBJ13" from the $dir/d*_results.json
6474+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
6475+
jq '.inconsistents' | python3 -c "$sortkeys" > $dir/dpPart2_wo13_results.json
6476+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6477+
python3 -c "$sortkeys" > /tmp/WQR_4s.json
6478+
6479+
pg_scrub $pg
6480+
6481+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.' > /tmp/WQR_5.json
6482+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
6483+
python3 -c "$sortkeys" > /tmp/WQR_5s_w13.json
6484+
(( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | \
6485+
jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' |\
6486+
jq '.inconsistents' | python3 -c "$sortkeys" > /tmp/WQR_5s_wo13.json
6487+
6488+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > \
6489+
$dir/sh2Part2_w13_results.json
6490+
rados list-inconsistent-obj $pg | jq "$jqfilter" | jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' |\
6491+
jq '.inconsistents' | python3 -c "$sortkeys" > $dir/shPart2_wo13_results.json
6492+
6493+
# the shallow scrub results should differ from the results of the deep
6494+
# scrub preceding it, but the difference should be limited to ROBJ13
6495+
diff -u $dir/dpPart2_w13_results.json $dir/sh2Part2_w13_results.json && return 1
6496+
diff -u $dir/dpPart2_wo13_results.json $dir/shPart2_wo13_results.json || return 1
6497+
6498+
ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
6499+
return 0
6500+
}
6501+
62556502

62566503
main osd-scrub-repair "$@"
62576504

src/common/map_cacher.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define MAPCACHER_H
1717

1818
#include "include/Context.h"
19+
#include "include/expected.hpp"
1920
#include "common/sharedptr_registry.hpp"
2021

2122
namespace MapCacher {
@@ -130,6 +131,50 @@ class MapCacher {
130131
return -EINVAL;
131132
} ///< @return error value, 0 on success, -ENOENT if no more entries
132133

134+
/// Fetch first key/value std::pair after specified key
135+
struct PosAndData {
136+
K last_key;
137+
V data;
138+
};
139+
using MaybePosAndData = tl::expected<PosAndData, int>;
140+
141+
MaybePosAndData get_1st_after_key(
142+
K key ///< [in] key after which to get next
143+
)
144+
{
145+
ceph_assert(driver);
146+
while (true) {
147+
std::pair<K, boost::optional<V>> cached;
148+
bool got_cached = in_progress.get_next(key, &cached);
149+
150+
///\todo a driver->get_next() that returns an expected<K, V> would be nice
151+
bool got_store{false};
152+
std::pair<K, V> store;
153+
int r = driver->get_next(key, &store);
154+
if (r < 0 && r != -ENOENT) {
155+
return tl::unexpected(r);
156+
} else if (r == 0) {
157+
got_store = true;
158+
}
159+
160+
if (!got_cached && !got_store) {
161+
return tl::unexpected(-ENOENT);
162+
} else if (got_cached && (!got_store || store.first >= cached.first)) {
163+
if (cached.second) {
164+
return PosAndData{cached.first, *cached.second};
165+
} else {
166+
key = cached.first;
167+
continue; // value was cached as removed, recurse
168+
}
169+
} else {
170+
return PosAndData{store.first, store.second};
171+
}
172+
}
173+
ceph_abort(); // not reachable
174+
return tl::unexpected(-EINVAL);
175+
}
176+
177+
133178
/// Adds operation setting keys to Transaction
134179
void set_keys(
135180
const std::map<K, V> &keys, ///< [in] keys/values to std::set

src/common/scrub_types.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,13 @@ void inconsistent_obj_wrapper::encode(bufferlist& bl) const
161161
ENCODE_FINISH(bl);
162162
}
163163

164+
bufferlist inconsistent_obj_wrapper::encode() const
165+
{
166+
bufferlist bl;
167+
encode(bl);
168+
return bl;
169+
}
170+
164171
void inconsistent_obj_wrapper::decode(bufferlist::const_iterator& bp)
165172
{
166173
DECODE_START(2, bp);
@@ -240,6 +247,13 @@ void inconsistent_snapset_wrapper::encode(bufferlist& bl) const
240247
ENCODE_FINISH(bl);
241248
}
242249

250+
bufferlist inconsistent_snapset_wrapper::encode() const
251+
{
252+
bufferlist bl;
253+
encode(bl);
254+
return bl;
255+
}
256+
243257
void inconsistent_snapset_wrapper::decode(bufferlist::const_iterator& bp)
244258
{
245259
DECODE_START(2, bp);

src/common/scrub_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
152152
const pg_shard_t &primary);
153153
void set_version(uint64_t ver) { version = ver; }
154154
void encode(ceph::buffer::list& bl) const;
155+
ceph::buffer::list encode() const;
155156
void decode(ceph::buffer::list::const_iterator& bp);
156157
};
157158

@@ -181,6 +182,7 @@ struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t {
181182
void set_size_mismatch();
182183

183184
void encode(ceph::buffer::list& bl) const;
185+
ceph::buffer::list encode() const;
184186
void decode(ceph::buffer::list::const_iterator& bp);
185187
};
186188

0 commit comments

Comments
 (0)