Skip to content

Commit d71eea9

Browse files
authored
Merge pull request ceph#51820 from ifed01/wip-ifed-fragmentation-info
os/bluestore: introduce allocator state histogram Reviewed-by: Adam Kupczyk <[email protected]>
2 parents fed7e2f + 5cdd29f commit d71eea9

File tree

4 files changed

+176
-9
lines changed

4 files changed

+176
-9
lines changed

src/os/bluestore/Allocator.cc

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "common/debug.h"
1515
#include "common/admin_socket.h"
1616
#define dout_subsys ceph_subsys_bluestore
17+
using TOPNSPC::common::cmd_getval;
1718

1819
using std::string;
1920
using std::to_string;
@@ -52,6 +53,13 @@ class Allocator::SocketHook : public AdminSocketHook {
5253
this,
5354
"give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)");
5455
ceph_assert(r == 0);
56+
r = admin_socket->register_command(
57+
("bluestore allocator fragmentation histogram " + name +
58+
" name=alloc_unit,type=CephInt,req=false" +
59+
" name=num_buckets,type=CephInt,req=false").c_str(),
60+
this,
61+
"build allocator free regions state histogram");
62+
ceph_assert(r == 0);
5563
}
5664
}
5765
}
@@ -100,6 +108,39 @@ class Allocator::SocketHook : public AdminSocketHook {
100108
f->open_object_section("fragmentation");
101109
f->dump_float("fragmentation_rating", alloc->get_fragmentation());
102110
f->close_section();
111+
} else if (command == "bluestore allocator fragmentation histogram " + name) {
112+
int64_t alloc_unit = 4096;
113+
cmd_getval(cmdmap, "alloc_unit", alloc_unit);
114+
if (alloc_unit == 0 ||
115+
p2align(alloc_unit, alloc->get_block_size()) != alloc_unit) {
116+
ss << "Invalid allocation unit: '" << alloc_unit
117+
<< ", to be aligned with: '" << alloc->get_block_size()
118+
<< std::endl;
119+
return -EINVAL;
120+
}
121+
int64_t num_buckets = 8;
122+
cmd_getval(cmdmap, "num_buckets", num_buckets);
123+
if (num_buckets < 2) {
124+
ss << "Invalid amount of buckets (min=2): '" << num_buckets
125+
<< std::endl;
126+
return -EINVAL;
127+
}
128+
129+
Allocator::FreeStateHistogram hist;
130+
hist.resize(num_buckets);
131+
alloc->build_free_state_histogram(alloc_unit, hist);
132+
f->open_array_section("extent_counts");
133+
for(int i = 0; i < num_buckets; i++) {
134+
f->open_object_section("c");
135+
f->dump_unsigned("max_len",
136+
hist[i].get_max(i, num_buckets)
137+
);
138+
f->dump_unsigned("total", hist[i].total);
139+
f->dump_unsigned("aligned", hist[i].aligned);
140+
f->dump_unsigned("units", hist[i].alloc_units);
141+
f->close_section();
142+
}
143+
f->close_section();
103144
} else {
104145
ss << "Invalid command" << std::endl;
105146
r = -ENOSYS;
@@ -234,3 +275,51 @@ double Allocator::get_fragmentation_score()
234275
double terrible = (sum / block_size) * get_score(block_size);
235276
return (ideal - score_sum) / (ideal - terrible);
236277
}
278+
279+
void Allocator::build_free_state_histogram(
280+
size_t alloc_unit, Allocator::FreeStateHistogram& hist)
281+
{
282+
auto num_buckets = hist.size();
283+
ceph_assert(num_buckets);
284+
285+
auto base = free_state_hist_bucket::base;
286+
auto base_bits = free_state_hist_bucket::base_bits;
287+
auto mux = free_state_hist_bucket::mux;
288+
// maximum chunk size we track,
289+
// provided by the bucket before the last one
290+
size_t max =
291+
free_state_hist_bucket::get_max(num_buckets - 2, num_buckets);
292+
293+
auto iterated_allocation = [&](size_t off, size_t len) {
294+
size_t idx;
295+
if (len <= base) {
296+
idx = 0;
297+
} else if (len > max) {
298+
idx = num_buckets - 1;
299+
} else {
300+
size_t most_bit = cbits(uint64_t(len-1)) - 1;
301+
idx = 1 + ((most_bit - base_bits) / mux);
302+
}
303+
ceph_assert(idx < num_buckets);
304+
++hist[idx].total;
305+
306+
// now calculate the bucket for the chunk after alignment,
307+
// resulting chunks shorter than alloc_unit are discarded
308+
auto delta = p2roundup(off, alloc_unit) - off;
309+
if (len >= delta + alloc_unit) {
310+
len -= delta;
311+
if (len <= base) {
312+
idx = 0;
313+
} else if (len > max) {
314+
idx = num_buckets - 1;
315+
} else {
316+
size_t most_bit = cbits(uint64_t(len-1)) - 1;
317+
idx = 1 + ((most_bit - base_bits) / mux);
318+
}
319+
++hist[idx].aligned;
320+
hist[idx].alloc_units += len / alloc_unit;
321+
}
322+
};
323+
324+
foreach(iterated_allocation);
325+
}

src/os/bluestore/Allocator.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,37 @@ class Allocator {
8888
return block_size;
8989
}
9090

91+
// The following code build Allocator's free extents histogram.
92+
// Which is a set of N buckets to track extents layout.
93+
// Extent matches a bucket depending on its length using the following
94+
// length spans:
95+
// [0..4K] (4K..16K] (16K..64K] .. (4M..16M] (16M..]
96+
// Each bucket tracks:
97+
// - total amount of extents of specific lengths
98+
// - amount of extents aligned with allocation boundary
99+
// - amount of allocation units in aligned extents
100+
//
101+
struct free_state_hist_bucket {
102+
static const size_t base_bits = 12;
103+
static const size_t base = 1ull << base_bits;
104+
static const size_t mux = 2;
105+
106+
size_t total = 0;
107+
size_t aligned = 0;
108+
size_t alloc_units = 0;
109+
110+
// returns upper bound of the bucket
111+
static size_t get_max(size_t bucket, size_t num_buckets) {
112+
return
113+
bucket < num_buckets - 1 ?
114+
base << (mux * bucket) :
115+
std::numeric_limits<uint64_t>::max();
116+
};
117+
};
118+
119+
typedef std::vector<free_state_hist_bucket> FreeStateHistogram;
120+
void build_free_state_histogram(size_t alloc_unit, FreeStateHistogram& hist);
121+
91122
private:
92123
class SocketHook;
93124
SocketHook* asok_hook = nullptr;

src/os/bluestore/BlueStore.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17763,9 +17763,11 @@ void BlueStore::_log_alerts(osd_alert_list_t& alerts)
1776317763
void BlueStore::_collect_allocation_stats(uint64_t need, uint32_t alloc_size,
1776417764
const PExtentVector& extents)
1776517765
{
17766-
alloc_stats_count++;
17767-
alloc_stats_fragments += extents.size();
17768-
alloc_stats_size += need;
17766+
if (alloc_size != min_alloc_size) {
17767+
alloc_stats_count++;
17768+
alloc_stats_fragments += extents.size();
17769+
alloc_stats_size += need;
17770+
}
1776917771

1777017772
for (auto& e : extents) {
1777117773
logger->hinc(l_bluestore_allocate_hist, e.length, need);

src/test/objectstore/allocator_replay_test.cc

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,15 @@
2020
using namespace std;
2121

2222
void usage(const string &name) {
23-
cerr << "Usage: " << name << " <log_to_replay> <raw_duplicates|duplicates|free_dump|try_alloc count want alloc_unit|replay_alloc alloc_list_file|export_binary out_file>" << std::endl;
23+
cerr << "Usage: " << name << " <log_to_replay|free-dump> "
24+
<< " raw_duplicates|"
25+
"duplicates|"
26+
"free_dump|"
27+
"try_alloc <count> <want> <alloc_unit>|"
28+
"replay_alloc <alloc_list_file|"
29+
"export_binary <out_file>|"
30+
"free_histogram [<alloc_unit>] [<num_buckets>]"
31+
<< std::endl;
2432
}
2533

2634
void usage_replay_alloc(const string &name) {
@@ -548,7 +556,7 @@ int main(int argc, char **argv)
548556
<< std::endl;
549557
std::cout << "Fragmentation score:" << a->get_fragmentation_score()
550558
<< std::endl;
551-
std::cout << "Free:" << std::hex << a->get_free() << std::dec
559+
std::cout << "Free: 0x" << std::hex << a->get_free() << std::dec
552560
<< std::endl;
553561
{
554562
// stub to implement various testing stuff on properly initialized allocator
@@ -574,7 +582,7 @@ int main(int argc, char **argv)
574582
<< std::endl;
575583
std::cout << "Fragmentation score:" << a->get_fragmentation_score()
576584
<< std::endl;
577-
std::cout << "Free:" << std::hex << a->get_free() << std::dec
585+
std::cout << "Free: 0x" << std::hex << a->get_free() << std::dec
578586
<< std::endl;
579587
{
580588
PExtentVector extents;
@@ -606,7 +614,7 @@ int main(int argc, char **argv)
606614
<< std::endl;
607615
std::cout << "Fragmentation score:" << a->get_fragmentation_score()
608616
<< std::endl;
609-
std::cout << "Free:" << std::hex << a->get_free() << std::dec
617+
std::cout << "Free: 0x" << std::hex << a->get_free() << std::dec
610618
<< std::endl;
611619
{
612620
/* replay a set of allocation requests */
@@ -655,7 +663,7 @@ int main(int argc, char **argv)
655663
<< std::endl;
656664
std::cerr << "Fragmentation score:" << a->get_fragmentation_score()
657665
<< std::endl;
658-
std::cerr << "Free:" << std::hex << a->get_free() << std::dec
666+
std::cerr << "Free: 0x" << std::hex << a->get_free() << std::dec
659667
<< std::endl;
660668
/* return 0 if the allocator ran out of space */
661669
if (r == -ENOSPC) {
@@ -681,11 +689,48 @@ int main(int argc, char **argv)
681689
<< std::endl;
682690
std::cout << "Fragmentation score:" << a->get_fragmentation_score()
683691
<< std::endl;
684-
std::cout << "Free:" << std::hex << a->get_free() << std::dec
692+
std::cout << "Free: 0x" << std::hex << a->get_free() << std::dec
685693
<< std::endl;
686694
}
687695
return 0;
688696
});
697+
} else if (strcmp(argv[2], "free_histogram") == 0) {
698+
uint64_t alloc_unit = 4096;
699+
auto num_buckets = 8;
700+
if (argc >= 4) {
701+
alloc_unit = strtoul(argv[3], nullptr, 10);
702+
}
703+
if (argc >= 5) {
704+
num_buckets = strtoul(argv[4], nullptr, 10);
705+
}
706+
return replay_free_dump_and_apply(argv[1],
707+
[&](Allocator *a, const string &aname) {
708+
ceph_assert(a);
709+
std::cout << "Fragmentation:" << a->get_fragmentation()
710+
<< std::endl;
711+
std::cout << "Fragmentation score:" << a->get_fragmentation_score()
712+
<< std::endl;
713+
std::cout << "Free: 0x" << std::hex << a->get_free() << std::dec
714+
<< std::endl;
715+
std::cout << "Allocation unit:" << alloc_unit
716+
<< std::endl;
717+
718+
Allocator::FreeStateHistogram hist;
719+
hist.resize(num_buckets);
720+
a->build_free_state_histogram(alloc_unit, hist);
721+
722+
uint64_t s = 0;
723+
for(int i = 0; i < num_buckets; i++) {
724+
uint64_t e = hist[i].get_max(i, num_buckets);
725+
std::cout << "(" << s << ".." << e << "]"
726+
<< " -> " << hist[i].total
727+
<< " chunks, " << hist[i].aligned << " aligned with "
728+
<< hist[i].alloc_units << " alloc_units."
729+
<< std::endl;
730+
s = e;
731+
}
732+
return 0;
733+
});
689734
} else if (strcmp(argv[2], "export_binary") == 0) {
690735
return export_as_binary(argv[1], argv[3]);
691736
} else if (strcmp(argv[2], "duplicates") == 0) {

0 commit comments

Comments
 (0)