Skip to content

Commit d27c19b

Browse files
committed
os/bluestore: Debug code to make reshard fail faster
Catch reshard on producing invalid result on encode. This makes it much easier to catch error. Intented for teuthology testing. Signed-off-by: Adam Kupczyk <[email protected]>
1 parent 12ffad5 commit d27c19b

File tree

3 files changed

+51
-12
lines changed

3 files changed

+51
-12
lines changed

src/common/options/global.yaml.in

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4895,6 +4895,14 @@ options:
48954895
desc: Preallocated buffer for inline shards
48964896
default: 256
48974897
with_legacy: true
4898+
- name: bluestore_debug_extent_map_encode_check
4899+
type: bool
4900+
level: dev
4901+
desc: Check correctness of extents in encode_some
4902+
default: false
4903+
with_legacy: false
4904+
flags:
4905+
- startup
48984906
- name: bluestore_cache_trim_interval
48994907
type: float
49004908
level: advanced

src/os/bluestore/BlueStore.cc

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3434,21 +3434,23 @@ void BlueStore::ExtentMap::dup_esb(BlueStore* b, TransContext* txc,
34343434
}
34353435

34363436
void BlueStore::ExtentMap::update(KeyValueDB::Transaction t,
3437-
bool force)
3437+
bool just_after_reshard)
34383438
{
34393439
auto cct = onode->c->store->cct; //used by dout
3440-
dout(20) << __func__ << " " << onode->oid << (force ? " force" : "") << dendl;
3440+
bool do_check = onode->c->store->debug_extent_map_encode_check;
3441+
dout(20) << __func__ << " " << onode->oid << (just_after_reshard ? " force" : "") << dendl;
34413442
if (onode->onode.extent_map_shards.empty()) {
34423443
if (inline_bl.length() == 0) {
34433444
unsigned n;
34443445
// we need to encode inline_bl to measure encoded length
3445-
bool never_happen = encode_some(0, OBJECT_MAX_SIZE, inline_bl, &n);
3446+
bool never_happen = encode_some(0, OBJECT_MAX_SIZE, inline_bl, &n,
3447+
do_check, do_check && just_after_reshard);
34463448
inline_bl.reassign_to_mempool(mempool::mempool_bluestore_inline_bl);
34473449
ceph_assert(!never_happen);
34483450
size_t len = inline_bl.length();
34493451
dout(20) << __func__ << " inline shard " << len << " bytes from " << n
34503452
<< " extents" << dendl;
3451-
if (!force && len > cct->_conf->bluestore_extent_map_shard_max_size) {
3453+
if (!just_after_reshard && len > cct->_conf->bluestore_extent_map_shard_max_size) {
34523454
request_reshard(0, OBJECT_MAX_SIZE);
34533455
return;
34543456
}
@@ -3486,11 +3488,11 @@ void BlueStore::ExtentMap::update(KeyValueDB::Transaction t,
34863488
encoded_shards.emplace_back(dirty_shard_t(&(*shard)));
34873489
bufferlist& bl = encoded_shards.back().bl;
34883490
if (encode_some(shard->shard_info->offset, endoff - shard->shard_info->offset,
3489-
bl, &shard->extents)) {
3490-
if (force) {
3491+
bl, &shard->extents, do_check, do_check && just_after_reshard)) {
3492+
if (just_after_reshard) {
34913493
_dump_extent_map<-1>(cct, *this);
34923494
derr << __func__ << " encode_some needs reshard" << dendl;
3493-
ceph_assert(!force);
3495+
ceph_assert(!just_after_reshard);
34943496
}
34953497
}
34963498
size_t len = bl.length();
@@ -3500,7 +3502,7 @@ void BlueStore::ExtentMap::update(KeyValueDB::Transaction t,
35003502
<< " bytes (was " << shard->shard_info->bytes << ") from "
35013503
<< shard->extents << " extents" << dendl;
35023504

3503-
if (!force) {
3505+
if (!just_after_reshard) {
35043506
if (len > cct->_conf->bluestore_extent_map_shard_max_size) {
35053507
// we are big; reshard ourselves
35063508
request_reshard(shard->shard_info->offset, endoff);
@@ -3924,7 +3926,9 @@ bool BlueStore::ExtentMap::encode_some(
39243926
uint32_t offset,
39253927
uint32_t length,
39263928
bufferlist& bl,
3927-
unsigned *pn)
3929+
unsigned *pn,
3930+
bool complain_extent_overlap,
3931+
bool complain_shard_spanning)
39283932
{
39293933
Extent dummy(offset);
39303934
auto start = extent_map.lower_bound(dummy);
@@ -3937,10 +3941,20 @@ bool BlueStore::ExtentMap::encode_some(
39373941
unsigned n = 0;
39383942
size_t bound = 0;
39393943
bool must_reshard = false;
3944+
uint32_t prev_offset_end = 0;
39403945
for (auto p = start;
39413946
p != extent_map.end() && p->logical_offset < end;
39423947
++p, ++n) {
39433948
ceph_assert(p->logical_offset >= offset);
3949+
if (complain_extent_overlap) {
3950+
if (p->logical_offset < prev_offset_end) {
3951+
using P = BlueStore::printer;
3952+
dout(-1) << __func__ << " extents overlap: " << std::endl
3953+
<< onode->print(P::NICK + P::SDISK + P::SUSE + P::SBUF) << dendl;
3954+
ceph_abort();
3955+
}
3956+
prev_offset_end = p->logical_end();
3957+
}
39443958
p->blob->last_encoded_id = -1;
39453959
if (!p->blob->is_spanning() && p->blob_escapes_range(offset, length)) {
39463960
dout(30) << __func__ << " 0x" << std::hex << offset << "~" << length
@@ -3983,6 +3997,14 @@ bool BlueStore::ExtentMap::encode_some(
39833997
p != extent_map.end() && p->logical_offset < end;
39843998
++p, ++n) {
39853999
unsigned blobid;
4000+
if (complain_shard_spanning) {
4001+
if (p->logical_end() > end) {
4002+
using P = BlueStore::printer;
4003+
dout(-1) << __func__ << " extent spans shard after reshard " << ": " << std::endl
4004+
<< onode->print(P::NICK + P::SDISK + P::SUSE + P::SBUF) << dendl;
4005+
ceph_abort();
4006+
}
4007+
}
39864008
bool include_blob = false;
39874009
if (p->blob->is_spanning()) {
39884010
blobid = p->blob->id << BLOBID_SHIFT_BITS;
@@ -9313,6 +9335,7 @@ int BlueStore::_mount()
93139335
segment_size = 0;
93149336
}
93159337
}
9338+
debug_extent_map_encode_check = cct->_conf.get_val<bool>("bluestore_debug_extent_map_encode_check");
93169339
_kv_only = false;
93179340
if (cct->_conf->bluestore_fsck_on_mount) {
93189341
int rc = fsck(cct->_conf->bluestore_fsck_on_mount_deep);

src/os/bluestore/BlueStore.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,8 +1044,12 @@ class BlueStore : public ObjectStore,
10441044

10451045
void dump(ceph::Formatter* f) const;
10461046

1047-
bool encode_some(uint32_t offset, uint32_t length, ceph::buffer::list& bl,
1048-
unsigned *pn);
1047+
bool encode_some(
1048+
uint32_t offset, uint32_t length, ceph::buffer::list& bl, unsigned *pn,
1049+
bool complain_extent_overlap, //verification; in debug mode assert if extents overlap
1050+
bool complain_shard_spanning //verification; in debug mode assert if extent spans shards;
1051+
//must be used only on encode after reshard
1052+
);
10491053

10501054
class ExtentDecoder {
10511055
uint64_t pos = 0;
@@ -1102,7 +1106,10 @@ class BlueStore : public ObjectStore,
11021106
return p->second;
11031107
}
11041108

1105-
void update(KeyValueDB::Transaction t, bool force);
1109+
void update(
1110+
KeyValueDB::Transaction t,
1111+
bool just_after_reshard //true to indicate that update should now respect shard boundaries
1112+
); //as no further resharding will be done
11061113
decltype(BlueStore::Blob::id) allocate_spanning_blob_id();
11071114
void reshard(
11081115
KeyValueDB *db,
@@ -2455,6 +2462,7 @@ class BlueStore : public ObjectStore,
24552462
"not enough bits for min_alloc_size");
24562463
bool elastic_shared_blobs = false; ///< use smart ExtentMap::dup to reduce shared blob count
24572464
bool use_write_v2 = false; ///< use new write path
2465+
bool debug_extent_map_encode_check = false;
24582466

24592467
enum {
24602468
// Please preserve the order since it's DB persistent

0 commit comments

Comments
 (0)