Skip to content

Commit 718afe2

Browse files
committed
mds: start a new major segment after reaching minor segment threshold
Credit goes to Patrick (@batrick) for identifying this. When there are huge number of subtree exports (such as done in export thrashing test), the MDS would log an EExport event. The EExport event is relatively large in size. This causes the MDS to log new minor log segments frequently. Moreover, the MDS logs a major segment (boundary) after a certain number of events have been logged. This casues large number of (minor) events to get build up and cause delays in trimming expired segments, since journal expire position is updated on segment boundaries. To mitigate this issue, the MDS now starts a major segment after a configured number of minor segments have been logged. This threshold is configurable by adjusting `mds_log_minor_segments_per_major_segment` MDS config (defaults to 16). Fixes: https://tracker.ceph.com/issues/66948 Signed-off-by: Venky Shankar <[email protected]>
1 parent 7d11c70 commit 718afe2

File tree

3 files changed

+22
-21
lines changed

3 files changed

+22
-21
lines changed

src/common/options/mds.yaml.in

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -586,16 +586,6 @@ options:
586586
min: 1
587587
services:
588588
- mds
589-
- name: mds_log_major_segment_event_ratio
590-
type: uint
591-
level: advanced
592-
desc: multiple of mds_log_events_per_segment between major segments
593-
default: 12
594-
services:
595-
- mds
596-
min: 1
597-
see_also:
598-
- mds_log_events_per_segment
599589
# segment size for mds log, default to default file_layout_t
600590
- name: mds_log_segment_size
601591
type: size
@@ -1741,3 +1731,12 @@ options:
17411731
- mds
17421732
flags:
17431733
- runtime
1734+
- name: mds_log_minor_segments_per_major_segment
1735+
type: uint
1736+
level: advanced
1737+
desc: number of minor segments per major segment.
1738+
long_desc: The number of minor mds log segments since last major segment after which a major segment is started/logged.
1739+
default: 16
1740+
services:
1741+
- mds
1742+
min: 8

src/mds/MDLog.cc

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,12 @@ MDLog::MDLog(MDSRank* m)
5353
event_large_threshold = g_conf().get_val<uint64_t>("mds_log_event_large_threshold");
5454
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
5555
pause = g_conf().get_val<bool>("mds_log_pause");
56-
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
5756
max_segments = g_conf().get_val<uint64_t>("mds_log_max_segments");
5857
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
5958
skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events");
6059
skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events");
6160
log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
61+
minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
6262
upkeep_thread = std::thread(&MDLog::log_trim_upkeep, this);
6363
}
6464

@@ -358,14 +358,15 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase* c)
358358
ceph_assert(!mds_is_shutting_down);
359359

360360
event_seq++;
361-
events_since_last_major_segment++;
362361

363362
if (auto sb = dynamic_cast<SegmentBoundary*>(le); sb) {
364363
auto ls = _start_new_segment(sb);
365364
if (sb->is_major_segment_boundary()) {
366365
major_segments.insert(ls->seq);
367366
logger->set(l_mdl_segmjr, major_segments.size());
368-
events_since_last_major_segment = 0;
367+
minor_segments_since_last_major_segment = 0;
368+
} else {
369+
++minor_segments_since_last_major_segment;
369370
}
370371
}
371372

@@ -404,7 +405,7 @@ void MDLog::_segment_upkeep()
404405
uint64_t period = journaler->get_layout_period();
405406
auto ls = get_current_segment();
406407
// start a new segment?
407-
if (events_since_last_major_segment > events_per_segment*major_segment_event_ratio) {
408+
if (minor_segments_since_last_major_segment > minor_segments_per_major_segment) {
408409
dout(10) << __func__ << ": starting new major segment, current " << *ls << dendl;
409410
auto sle = mds->mdcache->create_subtree_map();
410411
_submit_entry(sle, NULL);
@@ -1479,7 +1480,6 @@ void MDLog::_replay_thread()
14791480
}
14801481
le->set_start_off(pos);
14811482

1482-
events_since_last_major_segment++;
14831483
if (auto sb = dynamic_cast<SegmentBoundary*>(le.get()); sb) {
14841484
auto seq = sb->get_seq();
14851485
if (seq > 0) {
@@ -1492,7 +1492,9 @@ void MDLog::_replay_thread()
14921492
if (sb->is_major_segment_boundary()) {
14931493
major_segments.insert(event_seq);
14941494
logger->set(l_mdl_segmjr, major_segments.size());
1495-
events_since_last_major_segment = 0;
1495+
minor_segments_since_last_major_segment = 0;
1496+
} else {
1497+
++minor_segments_since_last_major_segment;
14961498
}
14971499
} else {
14981500
event_seq++;
@@ -1623,9 +1625,6 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
16231625
if (changed.count("mds_log_events_per_segment")) {
16241626
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
16251627
}
1626-
if (changed.count("mds_log_major_segment_event_ratio")) {
1627-
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
1628-
}
16291628
if (changed.count("mds_log_max_events")) {
16301629
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
16311630
}
@@ -1650,4 +1649,7 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
16501649
if (changed.count("mds_log_warn_factor")) {
16511650
log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
16521651
}
1652+
if (changed.count("mds_log_minor_segments_per_major_segment")) {
1653+
minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
1654+
}
16531655
}

src/mds/MDLog.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,17 +305,17 @@ class MDLog {
305305
bool debug_subtrees;
306306
std::atomic_uint64_t event_large_threshold; // accessed by submit thread
307307
uint64_t events_per_segment;
308-
uint64_t major_segment_event_ratio;
309308
int64_t max_events;
310309
uint64_t max_segments;
310+
uint64_t minor_segments_per_major_segment;
311311
bool pause;
312312
bool skip_corrupt_events;
313313
bool skip_unbounded_events;
314314

315315
std::set<uint64_t> major_segments;
316316
std::set<LogSegment*> expired_segments;
317317
std::set<LogSegment*> expiring_segments;
318-
uint64_t events_since_last_major_segment = 0;
318+
uint64_t minor_segments_since_last_major_segment = 0;
319319
double log_warn_factor;
320320

321321
// log trimming decay counter

0 commit comments

Comments
 (0)