Skip to content

Commit fbd5c40

Browse files
committed
osd: Update PGSnapTrim op queue item cost to reflect average object size
Previously, a static value of snap_trim_cost (1 MiB by default) for PGSnapTrim item was used (see config option osd_snap_trim_cost). For pools with significantly different sizes of objects, the static cost doesn't accurately estimate the amount of IO each snap trim operation requires. Instead, add a cost_per_object parameter to OSDService::queue_for_snap_trim and set it to the average object size in the PG being queued by using PG::get_average_object_size(). In addition, for the mClock scheduler, the cost_per_object is multiplied by the actual number of object trimmed per iteration. This multiplier is represented by osd_pg_max_concurrent_snap_trims config option which is used when the actual work starts (See DoSnapWork). Note: The above cost calculation is valid for most snap trim iterations except for: 1. The penultimate iteration which may return only 1 object to be trimmed, in which case the cost will be off by a factor equivalent to the average object size. 2. The final iteration (returns -ENOENT), involving clean-ups. Fixes: https://tracker.ceph.com/issues/63604 Signed-off-by: Sridhar Seshasayee <[email protected]>
1 parent 08b2255 commit fbd5c40

File tree

3 files changed

+24
-4
lines changed

3 files changed

+24
-4
lines changed

src/osd/OSD.cc

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1719,14 +1719,32 @@ void OSDService::queue_recovery_context(
17191719
e));
17201720
}
17211721

1722-
void OSDService::queue_for_snap_trim(PG *pg)
1722+
void OSDService::queue_for_snap_trim(PG *pg, uint64_t cost_per_object)
17231723
{
17241724
dout(10) << "queueing " << *pg << " for snaptrim" << dendl;
1725+
uint64_t cost_for_queue = [this, cost_per_object] {
1726+
if (cct->_conf->osd_op_queue == "mclock_scheduler") {
1727+
/* The cost calculation is valid for most snap trim iterations except
1728+
* for the following cases:
1729+
* 1) The penultimate iteration which may return 1 object to trim, in
1730+
* which case the cost will be off by a factor equivalent to the
1731+
* average object size, and,
1732+
* 2) The final iteration which returns -ENOENT and performs clean-ups.
1733+
*/
1734+
return cost_per_object * cct->_conf->osd_pg_max_concurrent_snap_trims;
1735+
} else {
1736+
/* We retain this legacy behavior for WeightedPriorityQueue.
1737+
* This branch should be removed after Squid.
1738+
*/
1739+
return cct->_conf->osd_snap_trim_cost;
1740+
}
1741+
}();
1742+
17251743
enqueue_back(
17261744
OpSchedulerItem(
17271745
unique_ptr<OpSchedulerItem::OpQueueable>(
17281746
new PGSnapTrim(pg->get_pgid(), pg->get_osdmap_epoch())),
1729-
cct->_conf->osd_snap_trim_cost,
1747+
cost_for_queue,
17301748
cct->_conf->osd_snap_trim_priority,
17311749
ceph_clock_now(),
17321750
0,

src/osd/OSD.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ class OSDService : public Scrub::ScrubSchedListener {
499499
GenContext<ThreadPool::TPHandle&> *c,
500500
uint64_t cost,
501501
int priority);
502-
void queue_for_snap_trim(PG *pg);
502+
void queue_for_snap_trim(PG *pg, uint64_t cost);
503503
void queue_for_scrub(PG* pg, Scrub::scrub_prio_t with_priority);
504504

505505
void queue_scrub_after_repair(PG* pg, Scrub::scrub_prio_t with_priority);

src/osd/PrimaryLogPG.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15605,8 +15605,10 @@ PrimaryLogPG::AwaitAsyncWork::AwaitAsyncWork(my_context ctx)
1560515605
NamedState(nullptr, "Trimming/AwaitAsyncWork")
1560615606
{
1560715607
auto *pg = context< SnapTrimmer >().pg;
15608+
// Determine cost in terms of the average object size
15609+
uint64_t cost_per_object = pg->get_average_object_size();
1560815610
context< SnapTrimmer >().log_enter(state_name);
15609-
context< SnapTrimmer >().pg->osd->queue_for_snap_trim(pg);
15611+
context< SnapTrimmer >().pg->osd->queue_for_snap_trim(pg, cost_per_object);
1561015612
pg->state_set(PG_STATE_SNAPTRIM);
1561115613
pg->state_clear(PG_STATE_SNAPTRIM_ERROR);
1561215614
pg->publish_stats_to_osd();

0 commit comments

Comments
 (0)