2424#include " common/entity_name.h"
2525#include " common/perf_counters.h"
2626#include " common/Cond.h"
27+ #include " common/ceph_time.h"
2728
2829#include " events/ESubtreeMap.h"
2930#include " events/ESegment.h"
@@ -45,7 +46,8 @@ MDLog::MDLog(MDSRank* m)
4546 mds(m),
4647 replay_thread(this ),
4748 recovery_thread(this ),
48- submit_thread(this )
49+ submit_thread(this ),
50+ log_trim_counter(DecayCounter(g_conf().get_val<double>(" mds_log_trim_decay_rate" )))
4951{
5052 debug_subtrees = g_conf ().get_val <bool >(" mds_debug_subtrees" );
5153 event_large_threshold = g_conf ().get_val <uint64_t >(" mds_log_event_large_threshold" );
@@ -56,6 +58,7 @@ MDLog::MDLog(MDSRank* m)
5658 max_events = g_conf ().get_val <int64_t >(" mds_log_max_events" );
5759 skip_corrupt_events = g_conf ().get_val <bool >(" mds_log_skip_corrupt_events" );
5860 skip_unbounded_events = g_conf ().get_val <bool >(" mds_log_skip_unbounded_events" );
61+ upkeep_thread = std::thread (&MDLog::log_trim_upkeep, this );
5962}
6063
6164MDLog::~MDLog ()
@@ -68,7 +71,6 @@ MDLog::~MDLog()
6871 }
6972}
7073
71-
7274void MDLog::create_logger ()
7375{
7476 PerfCountersBuilder plb (g_ceph_context, " mds_log" , l_mdl_first, l_mdl_last);
@@ -555,6 +557,13 @@ void MDLog::shutdown()
555557 }
556558 }
557559
560+ upkeep_log_trim_shutdown = true ;
561+ cond.notify_one ();
562+
563+ mds->mds_lock .unlock ();
564+ upkeep_thread.join ();
565+ mds->mds_lock .lock ();
566+
558567 // Replay thread can be stuck inside e.g. Journaler::wait_for_readable,
559568 // so we need to shutdown the journaler first.
560569 if (journaler) {
@@ -605,11 +614,23 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq)
605614 }
606615}
607616
608- void MDLog::trim (int m)
617+ void MDLog::log_trim_upkeep (void ) {
618+ dout (10 ) << dendl;
619+
620+ std::unique_lock mds_lock (mds->mds_lock );
621+ while (!upkeep_log_trim_shutdown.load ()) {
622+ if (mds->is_active () || mds->is_stopping ()) {
623+ trim ();
624+ }
625+
626+ cond.wait_for (mds_lock, g_conf ().get_val <std::chrono::milliseconds>(" mds_log_trim_upkeep_interval" ));
627+ }
628+ dout (10 ) << __func__ << " : finished" << dendl;
629+ }
630+
631+ void MDLog::trim ()
609632{
610633 int max_ev = max_events;
611- if (m >= 0 )
612- max_ev = m;
613634
614635 if (mds->mdcache ->is_readonly ()) {
615636 dout (10 ) << " trim, ignoring read-only FS" << dendl;
@@ -636,10 +657,6 @@ void MDLog::trim(int m)
636657 return ;
637658 }
638659
639- // hack: only trim for a few seconds at a time
640- utime_t stop = ceph_clock_now ();
641- stop += 2.0 ;
642-
643660 int op_prio = CEPH_MSG_PRIO_LOW +
644661 (CEPH_MSG_PRIO_HIGH - CEPH_MSG_PRIO_LOW) *
645662 expiring_segments.size () / max_segments;
@@ -648,32 +665,43 @@ void MDLog::trim(int m)
648665
649666 unsigned new_expiring_segments = 0 ;
650667
651- unsigned max_expiring_segments = 0 ;
652- if (pre_segments_size > 0 ){
653- max_expiring_segments = max_segments/2 ;
668+ if (pre_segments_size > 0 ) {
654669 ceph_assert (segments.size () >= pre_segments_size);
655- max_expiring_segments = std::max<unsigned >(max_expiring_segments,segments.size () - pre_segments_size);
656670 }
657-
671+
658672 map<uint64_t ,LogSegment*>::iterator p = segments.begin ();
673+
674+ auto trim_start = ceph::coarse_mono_clock::now ();
675+ std::optional<ceph::coarse_mono_time> trim_end;
676+
677+ auto log_trim_counter_start = log_trim_counter.get ();
678+ auto log_trim_threshold = g_conf ().get_val <Option::size_t >(" mds_log_trim_threshold" );
679+
659680 while (p != segments.end ()) {
660- if (stop < ceph_clock_now ())
681+ // throttle - break out of trimmming if we've hit the threshold
682+ if (log_trim_counter_start + new_expiring_segments >= log_trim_threshold) {
683+ auto time_spent = std::chrono::duration<double >::zero ();
684+ if (trim_end) {
685+ time_spent = std::chrono::duration<double >(*trim_end - trim_start);
686+ }
687+ dout (10 ) << __func__ << " : breaking out of trim loop - trimmed "
688+ << new_expiring_segments << " segment(s) in " << time_spent.count ()
689+ << " s" << dendl;
661690 break ;
691+ }
662692
663693 unsigned num_remaining_segments = (segments.size () - expired_segments.size () - expiring_segments.size ());
664- if ((num_remaining_segments <= max_segments) &&
665- (max_ev < 0 || (num_events - expiring_events - expired_events) <= ( uint64_t )max_ev))
666- break ;
694+ dout ( 10 ) << __func__ << " : new_expiring_segments= " << new_expiring_segments
695+ << " , num_remaining_segments= " << num_remaining_segments
696+ << " , max_segments= " << max_segments << dendl ;
667697
668- // Do not trim too many segments at once for peak workload. If mds keeps creating N segments each tick,
669- // the upper bound of 'num_remaining_segments - max_segments' is '2 * N'
670- if (new_expiring_segments * 2 > num_remaining_segments)
698+ if ((num_remaining_segments <= max_segments) &&
699+ (max_ev < 0 || (num_events - expiring_events - expired_events) <= (uint64_t )max_ev)) {
700+ dout (10 ) << __func__ << " : breaking out of trim loop - segments/events fell below ceiling"
701+ << " max_segments/max_ev" << dendl;
671702 break ;
703+ }
672704
673- if (max_expiring_segments > 0 &&
674- expiring_segments.size () >= max_expiring_segments)
675- break ;
676-
677705 // look at first segment
678706 LogSegment *ls = p->second ;
679707 ceph_assert (ls);
@@ -699,6 +727,8 @@ void MDLog::trim(int m)
699727
700728 uint64_t last_seq = ls->seq ;
701729 try_expire (ls, op_prio);
730+ log_trim_counter.hit ();
731+ trim_end = ceph::coarse_mono_clock::now ();
702732
703733 submit_mutex.lock ();
704734 p = segments.lower_bound (last_seq + 1 );
@@ -784,6 +814,7 @@ int MDLog::trim_all()
784814
785815void MDLog::try_expire (LogSegment *ls, int op_prio)
786816{
817+ ceph_assert (ceph_mutex_is_locked (mds->mds_lock ));
787818 MDSGatherBuilder gather_bld (g_ceph_context);
788819 ls->try_to_expire (mds, gather_bld, op_prio);
789820
@@ -1581,4 +1612,7 @@ void MDLog::handle_conf_change(const std::set<std::string>& changed, const MDSMa
15811612 if (changed.count (" mds_log_skip_unbounded_events" )) {
15821613 skip_unbounded_events = g_conf ().get_val <bool >(" mds_log_skip_unbounded_events" );
15831614 }
1615+ if (changed.count (" mds_log_trim_decay_rate" )){
1616+ log_trim_counter = DecayCounter (g_conf ().get_val <double >(" mds_log_trim_decay_rate" ));
1617+ }
15841618}
0 commit comments