Skip to content

Commit 9d2b3aa

Browse files
committed
Merge PR ceph#60381 into main
* refs/pull/60381/head: doc: remove refrences to `mds_log_major_segment_event_ratio` mds: start a new major segment after reaching minor segment threshold mds: make parts of mdlog reusable to be used by beacon Reviewed-by: Anthony D Atri <[email protected]> Reviewed-by: Patrick Donnelly <[email protected]>
2 parents ad153a8 + f54be33 commit 9d2b3aa

File tree

5 files changed

+44
-34
lines changed

5 files changed

+44
-34
lines changed

doc/cephfs/mds-journaling.rst

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,12 @@ The targetted size of a log segment in terms of number of events is controlled b
141141

142142
.. confval:: mds_log_events_per_segment
143143

144-
The frequency of major segments (noted by the journaling of the latest ``ESubtreeMap``) is controlled by:
144+
The number of minor mds log segments since last major segment is controlled by:
145145

146-
.. confval:: mds_log_major_segment_event_ratio
146+
.. confval:: mds_log_minor_segments_per_major_segment
147147

148-
When ``mds_log_events_per_segment * mds_log_major_segment_event_ratio``
149-
non-``ESubtreeMap`` events are logged, the MDS will journal a new
150-
``ESubtreeMap``. This is necessary to allow the journal to shrink in size
151-
during the trimming of expired segments.
148+
This controls how often the MDS trims expired log segments (higher the value, less
149+
often the MDS updates the journal expiry position for trimming).
152150

153151
The target maximum number of segments is controlled by:
154152

src/common/options/mds.yaml.in

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -586,16 +586,6 @@ options:
586586
min: 1
587587
services:
588588
- mds
589-
- name: mds_log_major_segment_event_ratio
590-
type: uint
591-
level: advanced
592-
desc: multiple of mds_log_events_per_segment between major segments
593-
default: 12
594-
services:
595-
- mds
596-
min: 1
597-
see_also:
598-
- mds_log_events_per_segment
599589
# segment size for mds log, default to default file_layout_t
600590
- name: mds_log_segment_size
601591
type: size
@@ -1741,3 +1731,12 @@ options:
17411731
- mds
17421732
flags:
17431733
- runtime
1734+
- name: mds_log_minor_segments_per_major_segment
1735+
type: uint
1736+
level: advanced
1737+
desc: number of minor segments per major segment.
1738+
long_desc: The number of minor mds log segments since last major segment after which a major segment is started/logged.
1739+
default: 16
1740+
services:
1741+
- mds
1742+
min: 8

src/mds/Beacon.cc

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -321,16 +321,15 @@ void Beacon::notify_health(MDSRank const *mds)
321321
// Detect MDS_HEALTH_TRIM condition
322322
// Indicates MDS is not trimming promptly
323323
{
324-
const auto log_max_segments = mds->mdlog->get_max_segments();
325-
const auto log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
326-
if (mds->mdlog->get_num_segments() > (size_t)(log_max_segments * log_warn_factor)) {
324+
if (mds->mdlog->is_trim_slow()) {
325+
auto num_segments = mds->mdlog->get_num_segments();
326+
auto max_segments = mds->mdlog->get_max_segments();
327327
CachedStackStringStream css;
328-
*css << "Behind on trimming (" << mds->mdlog->get_num_segments()
329-
<< "/" << log_max_segments << ")";
328+
*css << "Behind on trimming (" << num_segments << "/" << max_segments << ")";
330329

331330
MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv());
332-
m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments());
333-
m.metadata["max_segments"] = stringify(log_max_segments);
331+
m.metadata["num_segments"] = stringify(num_segments);
332+
m.metadata["max_segments"] = stringify(max_segments);
334333
health.metrics.push_back(m);
335334
}
336335
}

src/mds/MDLog.cc

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,12 @@ MDLog::MDLog(MDSRank* m)
5353
event_large_threshold = g_conf().get_val<uint64_t>("mds_log_event_large_threshold");
5454
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
5555
pause = g_conf().get_val<bool>("mds_log_pause");
56-
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
5756
max_segments = g_conf().get_val<uint64_t>("mds_log_max_segments");
5857
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
5958
skip_corrupt_events = g_conf().get_val<bool>("mds_log_skip_corrupt_events");
6059
skip_unbounded_events = g_conf().get_val<bool>("mds_log_skip_unbounded_events");
60+
log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
61+
minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
6162
upkeep_thread = std::thread(&MDLog::log_trim_upkeep, this);
6263
}
6364

@@ -357,14 +358,15 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase* c)
357358
ceph_assert(!mds_is_shutting_down);
358359

359360
event_seq++;
360-
events_since_last_major_segment++;
361361

362362
if (auto sb = dynamic_cast<SegmentBoundary*>(le); sb) {
363363
auto ls = _start_new_segment(sb);
364364
if (sb->is_major_segment_boundary()) {
365365
major_segments.insert(ls->seq);
366366
logger->set(l_mdl_segmjr, major_segments.size());
367-
events_since_last_major_segment = 0;
367+
minor_segments_since_last_major_segment = 0;
368+
} else {
369+
++minor_segments_since_last_major_segment;
368370
}
369371
}
370372

@@ -403,7 +405,7 @@ void MDLog::_segment_upkeep()
403405
uint64_t period = journaler->get_layout_period();
404406
auto ls = get_current_segment();
405407
// start a new segment?
406-
if (events_since_last_major_segment > events_per_segment*major_segment_event_ratio) {
408+
if (minor_segments_since_last_major_segment > minor_segments_per_major_segment) {
407409
dout(10) << __func__ << ": starting new major segment, current " << *ls << dendl;
408410
auto sle = mds->mdcache->create_subtree_map();
409411
_submit_entry(sle, NULL);
@@ -656,6 +658,10 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq)
656658
}
657659
}
658660

661+
bool MDLog::is_trim_slow() const {
662+
return (segments.size() > (size_t)(max_segments * log_warn_factor));
663+
}
664+
659665
void MDLog::log_trim_upkeep(void) {
660666
dout(10) << dendl;
661667

@@ -1474,7 +1480,6 @@ void MDLog::_replay_thread()
14741480
}
14751481
le->set_start_off(pos);
14761482

1477-
events_since_last_major_segment++;
14781483
if (auto sb = dynamic_cast<SegmentBoundary*>(le.get()); sb) {
14791484
auto seq = sb->get_seq();
14801485
if (seq > 0) {
@@ -1487,7 +1492,9 @@ void MDLog::_replay_thread()
14871492
if (sb->is_major_segment_boundary()) {
14881493
major_segments.insert(event_seq);
14891494
logger->set(l_mdl_segmjr, major_segments.size());
1490-
events_since_last_major_segment = 0;
1495+
minor_segments_since_last_major_segment = 0;
1496+
} else {
1497+
++minor_segments_since_last_major_segment;
14911498
}
14921499
} else {
14931500
event_seq++;
@@ -1618,9 +1625,6 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
16181625
if (changed.count("mds_log_events_per_segment")) {
16191626
events_per_segment = g_conf().get_val<uint64_t>("mds_log_events_per_segment");
16201627
}
1621-
if (changed.count("mds_log_major_segment_event_ratio")) {
1622-
major_segment_event_ratio = g_conf().get_val<uint64_t>("mds_log_major_segment_event_ratio");
1623-
}
16241628
if (changed.count("mds_log_max_events")) {
16251629
max_events = g_conf().get_val<int64_t>("mds_log_max_events");
16261630
}
@@ -1642,4 +1646,10 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
16421646
if (changed.count("mds_log_trim_decay_rate")){
16431647
log_trim_counter = DecayCounter(g_conf().get_val<double>("mds_log_trim_decay_rate"));
16441648
}
1649+
if (changed.count("mds_log_warn_factor")) {
1650+
log_warn_factor = g_conf().get_val<double>("mds_log_warn_factor");
1651+
}
1652+
if (changed.count("mds_log_minor_segments_per_major_segment")) {
1653+
minor_segments_per_major_segment = g_conf().get_val<uint64_t>("mds_log_minor_segments_per_major_segment");
1654+
}
16451655
}

src/mds/MDLog.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,9 @@ class MDLog {
173173
// replay state
174174
std::map<inodeno_t, std::set<inodeno_t>> pending_exports;
175175

176+
// beacon needs me too
177+
bool is_trim_slow() const;
178+
176179
protected:
177180
struct PendingEvent {
178181
PendingEvent(LogEvent *e, Context* c, bool f=false) : le(e), fin(c), flush(f) {}
@@ -302,17 +305,18 @@ class MDLog {
302305
bool debug_subtrees;
303306
std::atomic_uint64_t event_large_threshold; // accessed by submit thread
304307
uint64_t events_per_segment;
305-
uint64_t major_segment_event_ratio;
306308
int64_t max_events;
307309
uint64_t max_segments;
310+
uint64_t minor_segments_per_major_segment;
308311
bool pause;
309312
bool skip_corrupt_events;
310313
bool skip_unbounded_events;
311314

312315
std::set<uint64_t> major_segments;
313316
std::set<LogSegment*> expired_segments;
314317
std::set<LogSegment*> expiring_segments;
315-
uint64_t events_since_last_major_segment = 0;
318+
uint64_t minor_segments_since_last_major_segment = 0;
319+
double log_warn_factor;
316320

317321
// log trimming decay counter
318322
DecayCounter log_trim_counter;

0 commit comments

Comments
 (0)