File tree Expand file tree Collapse file tree 5 files changed +38
-13
lines changed
Expand file tree Collapse file tree 5 files changed +38
-13
lines changed Original file line number Diff line number Diff line change @@ -1712,14 +1712,32 @@ void OSDService::queue_recovery_context(
17121712 e));
17131713}
17141714
1715- void OSDService::queue_for_snap_trim (PG *pg)
1715+ void OSDService::queue_for_snap_trim (PG *pg, uint64_t cost_per_object )
17161716{
17171717 dout (10 ) << " queueing " << *pg << " for snaptrim" << dendl;
1718+ uint64_t cost_for_queue = [this , cost_per_object] {
1719+ if (cct->_conf ->osd_op_queue == " mclock_scheduler" ) {
1720+ /* The cost calculation is valid for most snap trim iterations except
1721+ * for the following cases:
1722+ * 1) The penultimate iteration which may return 1 object to trim, in
1723+ * which case the cost will be off by a factor equivalent to the
1724+ * average object size, and,
1725+ * 2) The final iteration which returns -ENOENT and performs clean-ups.
1726+ */
1727+ return cost_per_object * cct->_conf ->osd_pg_max_concurrent_snap_trims ;
1728+ } else {
1729+ /* We retain this legacy behavior for WeightedPriorityQueue.
1730+ * This branch should be removed after Squid.
1731+ */
1732+ return cct->_conf ->osd_snap_trim_cost ;
1733+ }
1734+ }();
1735+
17181736 enqueue_back (
17191737 OpSchedulerItem (
17201738 unique_ptr<OpSchedulerItem::OpQueueable>(
17211739 new PGSnapTrim (pg->get_pgid (), pg->get_osdmap_epoch ())),
1722- cct-> _conf -> osd_snap_trim_cost ,
1740+ cost_for_queue ,
17231741 cct->_conf ->osd_snap_trim_priority ,
17241742 ceph_clock_now (),
17251743 0 ,
Original file line number Diff line number Diff line change @@ -498,7 +498,7 @@ class OSDService : public Scrub::ScrubSchedListener {
498498 GenContext<ThreadPool::TPHandle&> *c,
499499 uint64_t cost,
500500 int priority);
501- void queue_for_snap_trim (PG *pg);
501+ void queue_for_snap_trim (PG *pg, uint64_t cost );
502502 void queue_for_scrub (PG* pg, Scrub::scrub_prio_t with_priority);
503503
504504 void queue_scrub_after_repair (PG* pg, Scrub::scrub_prio_t with_priority);
Original file line number Diff line number Diff line change @@ -420,15 +420,7 @@ void PG::queue_recovery()
420420 dout (10 ) << " queue_recovery -- queuing" << dendl;
421421 recovery_queued = true ;
422422 // Let cost per object be the average object size
423- auto num_bytes = static_cast <uint64_t >(
424- std::max<int64_t >(
425- 0 , // ensure bytes is non-negative
426- info.stats .stats .sum .num_bytes ));
427- auto num_objects = static_cast <uint64_t >(
428- std::max<int64_t >(
429- 1 , // ensure objects is non-negative and non-zero
430- info.stats .stats .sum .num_objects ));
431- uint64_t cost_per_object = std::max<uint64_t >(num_bytes / num_objects, 1 );
423+ uint64_t cost_per_object = get_average_object_size ();
432424 osd->queue_for_recovery (
433425 this , cost_per_object, recovery_state.get_recovery_op_priority ()
434426 );
Original file line number Diff line number Diff line change @@ -1030,6 +1030,19 @@ class PG : public DoutPrefixProvider,
10301030 return num_bytes;
10311031 }
10321032
1033+ uint64_t get_average_object_size () {
1034+ ceph_assert (ceph_mutex_is_locked_by_me (_lock));
1035+ auto num_bytes = static_cast <uint64_t >(
1036+ std::max<int64_t >(
1037+ 0 , // ensure bytes is non-negative
1038+ info.stats .stats .sum .num_bytes ));
1039+ auto num_objects = static_cast <uint64_t >(
1040+ std::max<int64_t >(
1041+ 1 , // ensure objects is non-negative and non-zero
1042+ info.stats .stats .sum .num_objects ));
1043+ return std::max<uint64_t >(num_bytes / num_objects, 1 );
1044+ }
1045+
10331046protected:
10341047
10351048 /*
Original file line number Diff line number Diff line change @@ -15605,8 +15605,10 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
1560515605 NamedState (nullptr , " Trimming/AwaitAsyncWork" )
1560615606{
1560715607 auto *pg = context< SnapTrimmer >().pg ;
15608+ // Determine cost in terms of the average object size
15609+ uint64_t cost_per_object = pg->get_average_object_size ();
1560815610 context< SnapTrimmer >().log_enter (state_name);
15609- context< SnapTrimmer >().pg ->osd ->queue_for_snap_trim (pg);
15611+ context< SnapTrimmer >().pg ->osd ->queue_for_snap_trim (pg, cost_per_object );
1561015612 pg->state_set (PG_STATE_SNAPTRIM);
1561115613 pg->state_clear (PG_STATE_SNAPTRIM_ERROR);
1561215614 pg->publish_stats_to_osd ();
You can’t perform that action at this time.
0 commit comments