Skip to content

Commit 27f2db0

Browse files
authored
Merge pull request ceph#63437 from ronen-fr/wip-rf-63379-tentacle
tentacle: osd/scrub: remove OsdScrub::LoadTracker Reviewed-by: Radoslaw Zarzynski <[email protected]>
2 parents 349c77a + c68c2a5 commit 27f2db0

File tree

3 files changed

+48
-99
lines changed

3 files changed

+48
-99
lines changed

src/osd/OSD.cc

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6274,12 +6274,9 @@ void OSD::heartbeat_check()
62746274
void OSD::heartbeat()
62756275
{
62766276
ceph_assert(ceph_mutex_is_locked_by_me(heartbeat_lock));
6277-
dout(30) << "heartbeat" << dendl;
6278-
6279-
auto load_for_logger = service.get_scrub_services().update_load_average();
6280-
if (load_for_logger) {
6281-
logger->set(l_osd_loadavg, load_for_logger.value());
6282-
}
6277+
logger->set(
6278+
l_osd_loadavg,
6279+
100.0 * service.get_scrub_services().update_load_average().value_or(0.0));
62836280
dout(30) << "heartbeat checking stats" << dendl;
62846281

62856282
// refresh peer list and osd stats

src/osd/scrubber/osd_scrub.cc

Lines changed: 24 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ OsdScrub::OsdScrub(
3838
, m_resource_bookkeeper{[this](std::string msg) { log_fwd(msg); }, conf}
3939
, m_queue{cct, m_osd_svc}
4040
, m_log_prefix{fmt::format("osd.{} osd-scrub:", m_osd_svc.get_nodeid())}
41-
, m_load_tracker{cct, conf, m_osd_svc.get_nodeid()}
4241
{
4342
create_scrub_perf_counters();
4443
}
@@ -211,7 +210,7 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
211210
}
212211

213212
env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now);
214-
env_conditions.cpu_overloaded = !m_load_tracker.scrub_load_below_threshold();
213+
env_conditions.cpu_overloaded = !scrub_load_below_threshold();
215214

216215
return env_conditions;
217216
}
@@ -263,89 +262,51 @@ void OsdScrub::on_config_change()
263262
}
264263
}
265264

265+
266266
// ////////////////////////////////////////////////////////////////////////// //
267267
// CPU load tracking and related
268268

269-
OsdScrub::LoadTracker::LoadTracker(
270-
CephContext* cct,
271-
const ceph::common::ConfigProxy& config,
272-
int node_id)
273-
: cct{cct}
274-
, conf{config}
275-
, log_prefix{fmt::format("osd.{} scrub-queue::load-tracker::", node_id)}
276-
{
277-
// initialize the daily loadavg with current 15min loadavg
278-
if (double loadavgs[3]; getloadavg(loadavgs, 3) == 3) {
279-
daily_loadavg = loadavgs[2];
280-
} else {
281-
derr << "OSD::init() : couldn't read loadavgs\n" << dendl;
282-
daily_loadavg = 1.0;
283-
}
284-
}
285-
286-
///\todo replace with Knuth's algo (to reduce the numerical error)
287-
std::optional<double> OsdScrub::LoadTracker::update_load_average()
269+
std::optional<double> OsdScrub::update_load_average()
288270
{
289-
auto hb_interval = conf->osd_heartbeat_interval;
290-
int n_samples = std::chrono::duration_cast<seconds>(24h).count();
291-
if (hb_interval > 1) {
292-
n_samples = std::max(n_samples / hb_interval, 1L);
293-
}
271+
// cache the number of CPUs
272+
loadavg_cpu_count = std::max(sysconf(_SC_NPROCESSORS_ONLN), 1L);
294273

295274
double loadavg;
296-
if (getloadavg(&loadavg, 1) == 1) {
297-
loadavg_1min = loadavg;
298-
daily_loadavg = (daily_loadavg * (n_samples - 1) + loadavg) / n_samples;
299-
return 100 * loadavg;
275+
if (getloadavg(&loadavg, 1) != 1) {
276+
return std::nullopt;
300277
}
301-
302-
// getloadavg() failed
303-
loadavg_1min = 0;
304-
return std::nullopt;
278+
return loadavg;
305279
}
306280

307-
bool OsdScrub::LoadTracker::scrub_load_below_threshold() const
281+
282+
bool OsdScrub::scrub_load_below_threshold() const
308283
{
309-
// if the 1-min load average - even before dividing by the number of CPUs -
310-
// is below the configured threshold, scrubs are allowed. No need to call
311-
// sysconf().
312-
if (loadavg_1min < conf->osd_scrub_load_threshold) {
313-
dout(20) << fmt::format(
314-
"loadavg {:.3f} < max {:.3f} = yes",
315-
loadavg_1min, conf->osd_scrub_load_threshold)
316-
<< dendl;
317-
return true;
284+
// fetch an up-to-date load average.
285+
// For the number of CPUs - rely on the last known value, fetched in the
286+
// 'heartbeat' thread.
287+
double loadavg;
288+
if (getloadavg(&loadavg, 1) != 1) {
289+
loadavg = 0;
318290
}
319291

320-
// check the load per CPU
321-
const long cpus = sysconf(_SC_NPROCESSORS_ONLN);
322-
const double loadavg_per_cpu = cpus > 0 ? loadavg_1min / cpus : loadavg_1min;
292+
const double loadavg_per_cpu = loadavg / loadavg_cpu_count;
323293
if (loadavg_per_cpu < conf->osd_scrub_load_threshold) {
324294
dout(20) << fmt::format(
325-
"loadavg per cpu {:.3f} < max {:.3f} (#CPUs: {}) = yes",
326-
loadavg_per_cpu, conf->osd_scrub_load_threshold, cpus)
295+
"loadavg per cpu {:.3f} < max {:.3f} (#CPUs:{}) = yes",
296+
loadavg_per_cpu, conf->osd_scrub_load_threshold,
297+
loadavg_cpu_count)
327298
<< dendl;
328299
return true;
329300
}
330301

331-
dout(10) << fmt::format(
332-
"loadavg {:.3f} >= max {:.3f} (#CPUs: {}) = no", loadavg_1min,
333-
conf->osd_scrub_load_threshold, cpus)
302+
dout(5) << fmt::format(
303+
"loadavg {:.3f} >= max {:.3f} (#CPUs:{}) = no",
304+
loadavg_per_cpu, conf->osd_scrub_load_threshold,
305+
loadavg_cpu_count)
334306
<< dendl;
335307
return false;
336308
}
337309

338-
std::ostream& OsdScrub::LoadTracker::gen_prefix(
339-
std::ostream& out,
340-
std::string_view fn) const
341-
{
342-
return out << log_prefix << fn << ": ";
343-
}
344-
345-
std::optional<double> OsdScrub::update_load_average()
346-
{
347-
return m_load_tracker.update_load_average();
348-
}
349310

350311
// ////////////////////////////////////////////////////////////////////////// //
351312

src/osd/scrubber/osd_scrub.h

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,13 @@ class OsdScrub {
118118
[[nodiscard]] bool scrub_time_permit(utime_t t) const;
119119

120120
/**
121-
* An external interface into the LoadTracker object. Used by
122-
* the OSD tick to update the load data in the logger.
121+
* Fetch the 1-minute load average. Used by
122+
* the OSD heartbeat handler to update a performance counter.
123+
* Also updates the number of CPUs, required internally by the
124+
* scrub queue.
123125
*
124-
* \returns 100*(the decaying (running) average of the CPU load
125-
* over the last 24 hours) or nullopt if the load is not
126-
* available.
127-
* Note that the multiplication by 100 is required by the logger interface
126+
* \returns the 1-minute element of getloadavg() or nullopt
127+
* if the load is not available.
128128
*/
129129
std::optional<double> update_load_average();
130130

@@ -195,31 +195,22 @@ class OsdScrub {
195195
*/
196196
bool scrub_random_backoff() const;
197197

198-
/**
199-
* tracking the average load on the CPU. Used both by the
200-
* OSD logger, and by the scrub queue (as no scrubbing is allowed if
201-
* the load is too high).
198+
// tracking the CPU load
199+
// ---------------------------------------------------------------
200+
201+
/*
202+
* tracking the average load on the CPU. Used both by the OSD performance
203+
* counters logger, and by the scrub queue (as no periodic scrubbing is
204+
* allowed if the load is too high).
202205
*/
203-
class LoadTracker {
204-
CephContext* cct;
205-
const ceph::common::ConfigProxy& conf;
206-
const std::string log_prefix;
207-
double daily_loadavg{0.0};
208-
double loadavg_1min{0.0};
209-
210-
public:
211-
explicit LoadTracker(
212-
CephContext* cct,
213-
const ceph::common::ConfigProxy& config,
214-
int node_id);
215-
216-
std::optional<double> update_load_average();
217-
218-
[[nodiscard]] bool scrub_load_below_threshold() const;
219-
220-
std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const;
221-
};
222-
LoadTracker m_load_tracker;
206+
207+
/// the number of CPUs
208+
long loadavg_cpu_count{1};
209+
210+
/// true if the load average (the 1-minute system average divided by
211+
/// the number of CPUs) is below the configured threshold
212+
bool scrub_load_below_threshold() const;
213+
223214

224215
// the scrub performance counters collections
225216
// ---------------------------------------------------------------

0 commit comments

Comments
 (0)