Skip to content

Commit 791614e

Browse files
committed
osd/OSD: fix track_pools_and_pg_num_changes on mapgaps
* track_pools_and_pg_num_changes call is moved before the superblock updates to know if we have any OSDMaps *before* trimming them (if possible). This is used to identify map gaps. * track_pools_and_pg_num_changes inner loop is moved into _track_pools_and_pg_num_changes. * lastmap now starts with the newest_map we have, even on mapgap. Previously, on mapgaps, we would (falsly) assume that this is the first start of this OSD and would ignore the state before the gap. This case is fully described in the tracker below: Fixes: https://tracker.ceph.com/issues/63881 Signed-off-by: Matan Breizman <[email protected]>
1 parent 5f55235 commit 791614e

File tree

2 files changed

+95
-56
lines changed

2 files changed

+95
-56
lines changed

src/osd/OSD.cc

Lines changed: 90 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -8245,13 +8245,18 @@ void OSD::handle_osd_map(MOSDMap *m)
82458245
rerequest_full_maps();
82468246
}
82478247

8248+
track_pools_and_pg_num_changes(added_maps, t);
8249+
82488250
if (!superblock.maps.empty()) {
82498251
trim_maps(m->cluster_osdmap_trim_lower_bound);
82508252
pg_num_history.prune(superblock.get_oldest_map());
82518253
}
82528254
superblock.insert_osdmap_epochs(first, last);
82538255
if (superblock.maps.num_intervals() > 1) {
8254-
dout(10) << __func__ << " osd map gap " << superblock.maps << dendl;
8256+
// we had a map gap and not yet trimmed all the way up to
8257+
// cluster_osdmap_trim_lower_bound
8258+
dout(10) << __func__ << " osd maps are not contiguous"
8259+
<< superblock.maps << dendl;
82558260
}
82568261
superblock.current_epoch = last;
82578262

@@ -8262,8 +8267,6 @@ void OSD::handle_osd_map(MOSDMap *m)
82628267
superblock.clean_thru = last;
82638268
}
82648269

8265-
track_pools_and_pg_num_changes(added_maps, t, last);
8266-
82678270
{
82688271
bufferlist bl;
82698272
::encode(pg_num_history, bl);
@@ -8308,65 +8311,98 @@ void OSD::handle_osd_map(MOSDMap *m)
83088311
}
83098312

83108313
/*
8311-
* For each added map which was sent in this MOSDMap [first, last]:
8312-
*
8313-
* 1) Check if a pool was deleted
8314-
* 2) For existing pools, check if pg_num was changed
8315-
* 3) Check if a pool was created
8316-
*
8317-
* Track all of the changes above in pg_num_history
8314+
* Compare between the previous last_map we had to
8315+
* each one of the added_maps.
8316+
* Track all of the changes relevant in pg_num_history.
83188317
*/
8319-
void OSD::track_pools_and_pg_num_changes(const map<epoch_t,OSDMapRef>& added_maps,
8320-
ObjectStore::Transaction& t,
8321-
epoch_t last)
8318+
void OSD::track_pools_and_pg_num_changes(
8319+
const map<epoch_t,OSDMapRef>& added_maps,
8320+
ObjectStore::Transaction& t)
83228321
{
8322+
epoch_t first = added_maps.begin()->first;
8323+
epoch_t last = added_maps.rbegin()->first;
8324+
8325+
// Unless this is the first start of this OSD,
8326+
// lastmap should be the newest_map we have.
83238327
OSDMapRef lastmap;
8324-
for (auto& [added_map_epoch, added_map] : added_maps) {
8325-
if (!lastmap) {
8326-
if (!(lastmap = service.try_get_map(added_map_epoch - 1))) {
8327-
dout(10) << __func__ << " can't get previous map " << added_map_epoch - 1
8328-
<< " probably first start of this osd" << dendl;
8329-
continue;
8330-
}
8328+
8329+
if (superblock.maps.empty()) {
8330+
dout(10) << __func__ << " no maps stored, this is probably "
8331+
<< "the first start of this osd" << dendl;
8332+
lastmap = added_maps.at(first);
8333+
} else {
8334+
if (first > superblock.get_newest_map() + 1) {
8335+
ceph_assert(first == superblock.cluster_osdmap_trim_lower_bound);
8336+
dout(20) << __func__ << " can't get previous map "
8337+
<< superblock.get_newest_map()
8338+
<< " first start of this osd after a map gap" << dendl;
8339+
}
8340+
if (!(lastmap =
8341+
service.try_get_map(superblock.get_newest_map()))) {
8342+
// This is unexpected
8343+
ceph_abort();
83318344
}
8332-
ceph_assert(lastmap->get_epoch() + 1 == added_map->get_epoch());
8333-
for (auto& [pool_id, pg_pool] : lastmap->get_pools()) {
8334-
if (!added_map->have_pg_pool(pool_id)) {
8335-
pg_num_history.log_pool_delete(added_map_epoch, pool_id);
8336-
dout(10) << __func__ << " recording final pg_pool_t for pool "
8337-
<< pool_id << dendl;
8338-
// this information is needed by _make_pg() if have to restart before
8339-
// the pool is deleted and need to instantiate a new (zombie) PG[Pool].
8340-
ghobject_t obj = make_final_pool_info_oid(pool_id);
8341-
bufferlist bl;
8342-
encode(pg_pool, bl, CEPH_FEATURES_ALL);
8343-
string name = lastmap->get_pool_name(pool_id);
8344-
encode(name, bl);
8345-
map<string,string> profile;
8346-
if (lastmap->get_pg_pool(pool_id)->is_erasure()) {
8347-
profile = lastmap->get_erasure_code_profile(
8348-
lastmap->get_pg_pool(pool_id)->erasure_code_profile);
8349-
}
8350-
encode(profile, bl);
8351-
t.write(coll_t::meta(), obj, 0, bl.length(), bl);
8352-
} else if (unsigned new_pg_num = added_map->get_pg_num(pool_id);
8353-
new_pg_num != pg_pool.get_pg_num()) {
8354-
dout(10) << __func__ << " recording pool " << pool_id << " pg_num "
8355-
<< pg_pool.get_pg_num() << " -> " << new_pg_num << dendl;
8356-
pg_num_history.log_pg_num_change(added_map_epoch, pool_id, new_pg_num);
8345+
}
8346+
8347+
// For each added map, record any changes into pg_num_history
8348+
// and update lastmap afterwards.
8349+
for (auto& [current_added_map_epoch, current_added_map] : added_maps) {
8350+
_track_pools_and_pg_num_changes(t, lastmap,
8351+
current_added_map,
8352+
current_added_map_epoch);
8353+
lastmap = current_added_map;
8354+
}
8355+
pg_num_history.epoch = last;
8356+
}
8357+
8358+
void OSD::_track_pools_and_pg_num_changes(
8359+
ObjectStore::Transaction& t,
8360+
const OSDMapRef& lastmap,
8361+
const OSDMapRef& current_added_map,
8362+
epoch_t current_added_map_epoch)
8363+
{
8364+
// 1) Check if a pool was deleted
8365+
for (auto& [pool_id, pg_pool] : lastmap->get_pools()) {
8366+
if (!current_added_map->have_pg_pool(pool_id)) {
8367+
pg_num_history.log_pool_delete(current_added_map_epoch, pool_id);
8368+
dout(10) << __func__ << " recording final pg_pool_t for pool "
8369+
<< pool_id << dendl;
8370+
// this information is needed by _make_pg() if have to restart before
8371+
// the pool is deleted and need to instantiate a new (zombie) PG[Pool].
8372+
ghobject_t obj = make_final_pool_info_oid(pool_id);
8373+
bufferlist bl;
8374+
encode(pg_pool, bl, CEPH_FEATURES_ALL);
8375+
string name = lastmap->get_pool_name(pool_id);
8376+
encode(name, bl);
8377+
map<string,string> profile;
8378+
if (lastmap->get_pg_pool(pool_id)->is_erasure()) {
8379+
profile = lastmap->get_erasure_code_profile(
8380+
lastmap->get_pg_pool(pool_id)->erasure_code_profile);
83578381
}
8382+
encode(profile, bl);
8383+
t.write(coll_t::meta(), obj, 0, bl.length(), bl);
8384+
8385+
// 2) For existing pools, check if pg_num was changed
8386+
} else if (unsigned new_pg_num = current_added_map->get_pg_num(pool_id);
8387+
new_pg_num != pg_pool.get_pg_num()) {
8388+
dout(10) << __func__ << " recording pool " << pool_id << " pg_num "
8389+
<< pg_pool.get_pg_num() << " -> " << new_pg_num << dendl;
8390+
pg_num_history.log_pg_num_change(current_added_map_epoch,
8391+
pool_id,
8392+
new_pg_num);
83588393
}
8359-
for (auto& [pool_id, pg_pool] : added_map->get_pools()) {
8360-
if (!lastmap->have_pg_pool(pool_id)) {
8361-
dout(10) << __func__ << " recording new pool " <<pool_id << " pg_num "
8362-
<< pg_pool.get_pg_num() << dendl;
8363-
pg_num_history.log_pg_num_change(added_map_epoch, pool_id,
8364-
pg_pool.get_pg_num());
8365-
}
8394+
}
8395+
8396+
// 3) Check if a pool was created
8397+
for (auto& [pool_id, pg_pool] : current_added_map->get_pools()) {
8398+
if (!lastmap->have_pg_pool(pool_id)) {
8399+
dout(10) << __func__ << " recording new pool " <<pool_id << " pg_num "
8400+
<< pg_pool.get_pg_num() << dendl;
8401+
pg_num_history.log_pg_num_change(current_added_map_epoch,
8402+
pool_id,
8403+
pg_pool.get_pg_num());
83668404
}
8367-
lastmap = added_map;
83688405
}
8369-
pg_num_history.epoch = last;
83708406
}
83718407

83728408
void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)

src/osd/OSD.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1675,8 +1675,11 @@ class OSD : public Dispatcher,
16751675

16761676
void handle_osd_map(class MOSDMap *m);
16771677
void track_pools_and_pg_num_changes(const std::map<epoch_t,OSDMapRef>& added_maps,
1678-
ObjectStore::Transaction& t,
1679-
epoch_t last);
1678+
ObjectStore::Transaction& t);
1679+
void _track_pools_and_pg_num_changes(ObjectStore::Transaction& t,
1680+
const OSDMapRef& lastmap,
1681+
const OSDMapRef& current_added_map,
1682+
epoch_t current_added_map_epoch);
16801683
void _committed_osd_maps(epoch_t first, epoch_t last, class MOSDMap *m);
16811684
void trim_maps(epoch_t oldest);
16821685
void note_down_osd(int osd);

0 commit comments

Comments
 (0)