Skip to content

Commit 67b7554

Browse files
authored
Merge pull request ceph#54437 from Matan-B/wip-matanb-crimson-osdmap-trimming
crimson/osd: introduce osdmap trimming Reviewed-by: Samuel Just <[email protected]>
2 parents 6594540 + 9a58b2e commit 67b7554

File tree

11 files changed

+164
-35
lines changed

11 files changed

+164
-35
lines changed

src/crimson/common/shared_lru.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class SharedLRU {
8383
cache.clear();
8484
}
8585
shared_ptr_t find(const K& key);
86+
K cached_key_lower_bound();
8687
// return the last element that is not greater than key
8788
shared_ptr_t lower_bound(const K& key);
8889
// return the first element that is greater than key
@@ -146,6 +147,15 @@ SharedLRU<K,V>::find(const K& key)
146147
return val;
147148
}
148149

150+
template<class K, class V>
151+
K SharedLRU<K,V>::cached_key_lower_bound()
152+
{
153+
if (weak_refs.empty()) {
154+
return {};
155+
}
156+
return weak_refs.begin()->first;
157+
}
158+
149159
template<class K, class V>
150160
typename SharedLRU<K,V>::shared_ptr_t
151161
SharedLRU<K,V>::lower_bound(const K& key)

src/crimson/mgr/client.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace crimson::mgr
2424
// implement WithStats if you want to report stats to mgr periodically
2525
class WithStats {
2626
public:
27-
virtual seastar::future<MessageURef> get_stats() const = 0;
27+
virtual seastar::future<MessageURef> get_stats() = 0;
2828
virtual ~WithStats() {}
2929
};
3030

src/crimson/osd/osd.cc

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,11 @@ seastar::future<> OSD::start()
400400
);
401401
}).then([this](OSDSuperblock&& sb) {
402402
superblock = std::move(sb);
403-
pg_shard_manager.set_superblock(superblock);
403+
if (!superblock.cluster_osdmap_trim_lower_bound) {
404+
superblock.cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
405+
}
406+
return pg_shard_manager.set_superblock(superblock);
407+
}).then([this] {
404408
return pg_shard_manager.get_local_map(superblock.current_epoch);
405409
}).then([this](OSDMapService::local_cached_map_t&& map) {
406410
osdmap = make_local_shared_foreign(OSDMapService::local_cached_map_t(map));
@@ -864,6 +868,25 @@ void OSD::handle_authentication(const EntityName& name,
864868
}
865869
}
866870

871+
const char** OSD::get_tracked_conf_keys() const
872+
{
873+
static const char* KEYS[] = {
874+
"osd_beacon_report_interval",
875+
nullptr
876+
};
877+
return KEYS;
878+
}
879+
880+
void OSD::handle_conf_change(
881+
const crimson::common::ConfigProxy& conf,
882+
const std::set <std::string> &changed)
883+
{
884+
if (changed.count("osd_beacon_report_interval")) {
885+
beacon_timer.rearm_periodic(
886+
std::chrono::seconds(conf->osd_beacon_report_interval));
887+
}
888+
}
889+
867890
void OSD::update_stats()
868891
{
869892
osd_stat_seq++;
@@ -879,13 +902,20 @@ void OSD::update_stats()
879902
});
880903
}
881904

882-
seastar::future<MessageURef> OSD::get_stats() const
905+
seastar::future<MessageURef> OSD::get_stats()
883906
{
884907
// MPGStats::had_map_for is not used since PGMonitor was removed
885908
auto m = crimson::make_message<MPGStats>(monc->get_fsid(), osdmap->get_epoch());
886909
m->osd_stat = osd_stat;
887910
return pg_shard_manager.get_pg_stats(
888-
).then([m=std::move(m)](auto &&stats) mutable {
911+
).then([this, m=std::move(m)](auto &&stats) mutable {
912+
min_last_epoch_clean = osdmap->get_epoch();
913+
min_last_epoch_clean_pgs.clear();
914+
for (auto [pgid, stat] : stats) {
915+
min_last_epoch_clean = std::min(min_last_epoch_clean,
916+
stat.get_effective_last_epoch_clean());
917+
min_last_epoch_clean_pgs.push_back(pgid);
918+
}
889919
m->pg_stat = std::move(stats);
890920
return seastar::make_ready_future<MessageURef>(std::move(m));
891921
});
@@ -934,6 +964,16 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
934964
logger().info("handle_osd_map epochs [{}..{}], i have {}, src has [{}..{}]",
935965
first, last, superblock.get_newest_map(),
936966
m->cluster_osdmap_trim_lower_bound, m->newest_map);
967+
968+
if (superblock.cluster_osdmap_trim_lower_bound <
969+
m->cluster_osdmap_trim_lower_bound) {
970+
superblock.cluster_osdmap_trim_lower_bound =
971+
m->cluster_osdmap_trim_lower_bound;
972+
logger().debug("{} superblock cluster_osdmap_trim_lower_bound new epoch is: {}",
973+
__func__, superblock.cluster_osdmap_trim_lower_bound);
974+
ceph_assert(
975+
superblock.cluster_osdmap_trim_lower_bound >= superblock.get_oldest_map());
976+
}
937977
// make sure there is something new, here, before we bother flushing
938978
// the queues and such
939979
if (last <= superblock.get_newest_map()) {
@@ -964,8 +1004,9 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
9641004
monc->sub_got("osdmap", last);
9651005

9661006
if (!superblock.maps.empty()) {
967-
// TODO: support osdmap trimming
968-
// See: <tracker>
1007+
pg_shard_manager.trim_maps(t, superblock);
1008+
// TODO: once we support pg splitting, update pg_num_history here
1009+
//pg_num_history.prune(superblock.get_oldest_map());
9691010
}
9701011

9711012
superblock.insert_osdmap_epochs(first, last);
@@ -977,11 +1018,13 @@ seastar::future<> OSD::_handle_osd_map(Ref<MOSDMap> m)
9771018
superblock.clean_thru = last;
9781019
}
9791020
pg_shard_manager.get_meta_coll().store_superblock(t, superblock);
980-
pg_shard_manager.set_superblock(superblock);
981-
logger().debug("OSD::handle_osd_map: do_transaction...");
982-
return store.get_sharded_store().do_transaction(
983-
pg_shard_manager.get_meta_coll().collection(),
984-
std::move(t));
1021+
return pg_shard_manager.set_superblock(superblock).then(
1022+
[this, &t] {
1023+
logger().debug("OSD::handle_osd_map: do_transaction...");
1024+
return store.get_sharded_store().do_transaction(
1025+
pg_shard_manager.get_meta_coll().collection(),
1026+
std::move(t));
1027+
});
9851028
});
9861029
}).then([=, this] {
9871030
// TODO: write to superblock and commit the transaction
@@ -1266,14 +1309,13 @@ seastar::future<> OSD::send_beacon()
12661309
if (!pg_shard_manager.is_active()) {
12671310
return seastar::now();
12681311
}
1269-
// FIXME: min lec should be calculated from pg_stat
1270-
// and should set m->pgs
1271-
epoch_t min_last_epoch_clean = osdmap->get_epoch();
1272-
auto m = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(),
1312+
auto beacon = crimson::make_message<MOSDBeacon>(osdmap->get_epoch(),
12731313
min_last_epoch_clean,
12741314
superblock.last_purged_snaps_scrub,
12751315
local_conf()->osd_beacon_report_interval);
1276-
return monc->send_message(std::move(m));
1316+
beacon->pgs = min_last_epoch_clean_pgs;
1317+
logger().debug("{} {}", __func__, *beacon);
1318+
return monc->send_message(std::move(beacon));
12771319
}
12781320

12791321
seastar::future<> OSD::update_heartbeat_peers()

src/crimson/osd/osd.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ class PG;
6161

6262
class OSD final : public crimson::net::Dispatcher,
6363
private crimson::common::AuthHandler,
64-
private crimson::mgr::WithStats {
64+
private crimson::mgr::WithStats,
65+
public md_config_obs_t {
6566
const int whoami;
6667
const uint32_t nonce;
6768
seastar::abort_source& abort_source;
@@ -106,8 +107,11 @@ class OSD final : public crimson::net::Dispatcher,
106107
// pg statistics including osd ones
107108
osd_stat_t osd_stat;
108109
uint32_t osd_stat_seq = 0;
110+
epoch_t min_last_epoch_clean = 0;
111+
// which pgs were scanned for min_lec
112+
std::vector<pg_t> min_last_epoch_clean_pgs;
109113
void update_stats();
110-
seastar::future<MessageURef> get_stats() const final;
114+
seastar::future<MessageURef> get_stats() final;
111115

112116
// AuthHandler methods
113117
void handle_authentication(const EntityName& name,
@@ -123,6 +127,10 @@ class OSD final : public crimson::net::Dispatcher,
123127
std::unique_ptr<Heartbeat> heartbeat;
124128
seastar::timer<seastar::lowres_clock> tick_timer;
125129

130+
const char** get_tracked_conf_keys() const final;
131+
void handle_conf_change(const ConfigProxy& conf,
132+
const std::set<std::string> &changed) final;
133+
126134
// admin-socket
127135
seastar::lw_shared_ptr<crimson::admin::AdminSocket> asok;
128136

src/crimson/osd/osd_meta.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ void OSDMeta::store_map(ceph::os::Transaction& t,
2424
t.write(coll->get_cid(), osdmap_oid(e), 0, m.length(), m);
2525
}
2626

27+
void OSDMeta::remove_map(ceph::os::Transaction& t, epoch_t e)
28+
{
29+
t.remove(coll->get_cid(), osdmap_oid(e));
30+
}
31+
2732
seastar::future<bufferlist> OSDMeta::load_map(epoch_t e)
2833
{
2934
return store.read(coll,

src/crimson/osd/osd_meta.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class OSDMeta {
4040

4141
void store_map(ceph::os::Transaction& t,
4242
epoch_t e, const bufferlist& m);
43+
void remove_map(ceph::os::Transaction& t, epoch_t e);
4344
seastar::future<bufferlist> load_map(epoch_t e);
4445

4546
void store_superblock(ceph::os::Transaction& t,

src/crimson/osd/pg.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,7 @@ class PG : public boost::intrusive_ref_counter<
348348
void on_active_advmap(const OSDMapRef &osdmap) final;
349349

350350
epoch_t cluster_osdmap_trim_lower_bound() final {
351-
// TODO
352-
return 0;
351+
return shard_services.get_osdmap_tlb();
353352
}
354353

355354
void on_backfill_reserved() final {

src/crimson/osd/pg_shard_manager.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,13 @@ seastar::future<> PGShardManager::set_up_epoch(epoch_t e) {
105105
});
106106
}
107107

108+
seastar::future<> PGShardManager::set_superblock(OSDSuperblock superblock) {
109+
ceph_assert(seastar::this_shard_id() == PRIMARY_CORE);
110+
get_osd_singleton_state().set_singleton_superblock(superblock);
111+
return shard_services.invoke_on_all(
112+
[superblock = std::move(superblock)](auto &local_service) {
113+
return local_service.local_state.update_shard_superblock(superblock);
114+
});
115+
}
116+
108117
}

src/crimson/osd/pg_shard_manager.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,17 @@ class PGShardManager {
129129
FORWARD_TO_OSD_SINGLETON(init_meta_coll)
130130
FORWARD_TO_OSD_SINGLETON(get_meta_coll)
131131

132-
FORWARD_TO_OSD_SINGLETON(set_superblock)
133-
134132
// Core OSDMap methods
135133
FORWARD_TO_OSD_SINGLETON(get_local_map)
136134
FORWARD_TO_OSD_SINGLETON(load_map_bl)
137135
FORWARD_TO_OSD_SINGLETON(load_map_bls)
138136
FORWARD_TO_OSD_SINGLETON(store_maps)
137+
FORWARD_TO_OSD_SINGLETON(trim_maps)
139138

140139
seastar::future<> set_up_epoch(epoch_t e);
141140

141+
seastar::future<> set_superblock(OSDSuperblock superblock);
142+
142143
template <typename F>
143144
auto with_remote_shard_state(core_id_t core, F &&f) {
144145
return shard_services.invoke_on(

src/crimson/osd/shard_services.cc

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ seastar::future<> PerShardState::stop_pgs()
7272
});
7373
}
7474

75-
std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats() const
75+
std::map<pg_t, pg_stat_t> PerShardState::get_pg_stats()
7676
{
7777
assert_core();
7878
std::map<pg_t, pg_stat_t> ret;
@@ -119,6 +119,13 @@ HeartbeatStampsRef PerShardState::get_hb_stamps(int peer)
119119
return stamps->second;
120120
}
121121

122+
seastar::future<> PerShardState::update_shard_superblock(OSDSuperblock superblock)
123+
{
124+
assert_core();
125+
per_shard_superblock = std::move(superblock);
126+
return seastar::now();
127+
}
128+
122129
OSDSingletonState::OSDSingletonState(
123130
int whoami,
124131
crimson::net::Messenger &cluster_msgr,
@@ -352,7 +359,6 @@ void OSDSingletonState::handle_conf_change(
352359
seastar::future<OSDSingletonState::local_cached_map_t>
353360
OSDSingletonState::get_local_map(epoch_t e)
354361
{
355-
// TODO: use LRU cache for managing osdmap, fallback to disk if we have to
356362
if (auto found = osdmaps.find(e); found) {
357363
logger().debug("{} osdmap.{} found in cache", __func__, e);
358364
return seastar::make_ready_future<local_cached_map_t>(std::move(found));
@@ -392,6 +398,9 @@ seastar::future<std::map<epoch_t, bufferlist>> OSDSingletonState::load_map_bls(
392398
logger().debug("{} loading maps [{},{}]",
393399
__func__, first, last);
394400
ceph_assert(first <= last);
401+
// TODO: take osd_map_max into account
402+
//int max = cct->_conf->osd_map_message_max;
403+
//ssize_t max_bytes = cct->_conf->osd_map_message_max_bytes;
395404
return seastar::map_reduce(boost::make_counting_iterator<epoch_t>(first),
396405
boost::make_counting_iterator<epoch_t>(last + 1),
397406
[this](epoch_t e) {
@@ -458,6 +467,34 @@ seastar::future<> OSDSingletonState::store_maps(ceph::os::Transaction& t,
458467
});
459468
}
460469

470+
// Note: store/set_superblock is called in later OSD::handle_osd_map
471+
// so we use the OSD's superblock reference meanwhile.
472+
void OSDSingletonState::trim_maps(ceph::os::Transaction& t,
473+
OSDSuperblock& superblock)
474+
{
475+
epoch_t min =
476+
std::min(superblock.cluster_osdmap_trim_lower_bound,
477+
osdmaps.cached_key_lower_bound());
478+
479+
if (min <= superblock.get_oldest_map()) {
480+
return;
481+
}
482+
logger().debug("{}: min={} oldest_map={}", __func__, min, superblock.get_oldest_map());
483+
484+
// Trim from the superblock's oldest_map up to `min`.
485+
// Break if we have exceeded the txn target size.
486+
while (superblock.get_oldest_map() < min &&
487+
t.get_num_ops() < crimson::common::local_conf()->osd_target_transaction_size) {
488+
logger().debug("{}: removing old osdmap epoch {}", __func__, superblock.get_oldest_map());
489+
meta_coll->remove_map(t, superblock.get_oldest_map());
490+
superblock.maps.erase(superblock.get_oldest_map());
491+
}
492+
493+
// we should not trim past osdmaps.cached_key_lower_bound()
494+
// as there may still be PGs with those map epochs recorded.
495+
ceph_assert(min <= osdmaps.cached_key_lower_bound());
496+
}
497+
461498
seastar::future<Ref<PG>> ShardServices::make_pg(
462499
OSDMapService::cached_map_t create_map,
463500
spg_t pgid,
@@ -716,30 +753,36 @@ seastar::future<> OSDSingletonState::send_incremental_map(
716753
"superblock's oldest map: {}",
717754
__func__, first, superblock.get_oldest_map());
718755
if (first >= superblock.get_oldest_map()) {
756+
// TODO: osd_map_share_max_epochs
757+
// See OSDService::build_incremental_map_msg
758+
if (first < superblock.cluster_osdmap_trim_lower_bound) {
759+
logger().info("{}: cluster osdmap lower bound: {} "
760+
" > first {}, starting with full map",
761+
__func__, superblock.cluster_osdmap_trim_lower_bound, first);
762+
// we don't have the next map the target wants,
763+
// so start with a full map.
764+
first = superblock.cluster_osdmap_trim_lower_bound;
765+
}
719766
return load_map_bls(
720767
first, superblock.get_newest_map()
721-
).then([this, &conn, first](auto&& bls) {
768+
).then([this, &conn](auto&& bls) {
722769
auto m = crimson::make_message<MOSDMap>(
723770
monc.get_fsid(),
724771
osdmap->get_encoding_features());
725-
m->cluster_osdmap_trim_lower_bound = first;
772+
m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
726773
m->newest_map = superblock.get_newest_map();
727774
m->maps = std::move(bls);
728775
return conn.send(std::move(m));
729776
});
730777
} else {
778+
// See OSDService::send_incremental_map
779+
// just send latest full map
731780
return load_map_bl(osdmap->get_epoch()
732781
).then([this, &conn](auto&& bl) mutable {
733782
auto m = crimson::make_message<MOSDMap>(
734783
monc.get_fsid(),
735784
osdmap->get_encoding_features());
736-
/* TODO: once we support the tracking of superblock's
737-
* cluster_osdmap_trim_lower_bound, the MOSDMap should
738-
* be populated with this value instead of the oldest_map.
739-
* See: OSD::handle_osd_map for how classic updates the
740-
* cluster's trim lower bound.
741-
*/
742-
m->cluster_osdmap_trim_lower_bound = superblock.get_oldest_map();
785+
m->cluster_osdmap_trim_lower_bound = superblock.cluster_osdmap_trim_lower_bound;
743786
m->newest_map = superblock.get_newest_map();
744787
m->maps.emplace(osdmap->get_epoch(), std::move(bl));
745788
return conn.send(std::move(m));

0 commit comments

Comments
 (0)