Skip to content

Commit a07a7d1

Browse files
authored
core: Replace sbf with hyperloglog (#5784)
* core: Replace sbf with hyperloglog Signed-off-by: Abhijat Malviya <[email protected]>
1 parent b9ddaee commit a07a7d1

File tree

3 files changed

+54
-52
lines changed

3 files changed

+54
-52
lines changed

src/core/page_usage_stats.cc

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212

1313
#include <string>
1414

15-
#include "core/bloom.h"
16-
1715
extern "C" {
1816
#include <unistd.h>
1917

@@ -30,23 +28,17 @@ using absl::StripTrailingAsciiWhitespace;
3028

3129
namespace {
3230
constexpr auto kUsageHistPoints = std::array{50, 90, 99};
33-
constexpr auto kInitialSBFCap = 1000;
34-
constexpr auto kFProb = 0.001;
35-
constexpr auto kGrowthFactor = 2;
3631
constexpr auto kHistSignificantFigures = 3;
3732

38-
} // namespace
39-
40-
FilterWithSize::FilterWithSize()
41-
: sbf{SBF{kInitialSBFCap, kFProb, kGrowthFactor, PMR_NS::get_default_resource()}}, size{0} {
33+
HllBufferPtr InitHllPtr() {
34+
HllBufferPtr p;
35+
p.size = getDenseHllSize();
36+
p.hll = new uint8_t[p.size];
37+
CHECK_EQ(0, createDenseHll(p));
38+
return p;
4239
}
4340

44-
void FilterWithSize::Add(uintptr_t address) {
45-
const auto s = std::to_string(address);
46-
if (sbf.Add(s)) {
47-
size += 1;
48-
}
49-
}
41+
} // namespace
5042

5143
void CollectedPageStats::Merge(CollectedPageStats&& other, uint16_t shard_id) {
5244
this->pages_scanned += other.pages_scanned;
@@ -98,34 +90,47 @@ std::string CollectedPageStats::ToString() const {
9890
return response;
9991
}
10092

101-
PageUsage::UniquePages::UniquePages() {
93+
PageUsage::UniquePages::UniquePages()
94+
: pages_scanned{InitHllPtr()},
95+
pages_marked_for_realloc{InitHllPtr()},
96+
pages_full{InitHllPtr()},
97+
pages_reserved_for_malloc{InitHllPtr()},
98+
pages_with_heap_mismatch{InitHllPtr()},
99+
pages_above_threshold{InitHllPtr()} {
102100
hdr_histogram* h = nullptr;
103101
const auto init_result = hdr_init(1, 100, kHistSignificantFigures, &h);
104102
CHECK_EQ(0, init_result) << "failed to initialize histogram";
105103
page_usage_hist = h;
106104
}
107105

108106
PageUsage::UniquePages::~UniquePages() {
107+
delete[] pages_scanned.hll;
108+
delete[] pages_marked_for_realloc.hll;
109+
delete[] pages_full.hll;
110+
delete[] pages_reserved_for_malloc.hll;
111+
delete[] pages_with_heap_mismatch.hll;
112+
delete[] pages_above_threshold.hll;
109113
hdr_close(page_usage_hist);
110114
}
111115

112116
void PageUsage::UniquePages::AddStat(mi_page_usage_stats_t stat) {
113-
const auto address = stat.page_address;
114-
pages_scanned.Add(address);
117+
const auto data = reinterpret_cast<const unsigned char*>(&stat.page_address);
118+
constexpr size_t size = sizeof(stat.page_address);
119+
pfadd_dense(pages_scanned, data, size);
115120
if (stat.flags == MI_DFLY_PAGE_BELOW_THRESHOLD) {
116-
pages_marked_for_realloc.Add(address);
121+
pfadd_dense(pages_marked_for_realloc, data, size);
117122
} else {
118123
if (stat.flags & MI_DFLY_PAGE_FULL) {
119-
pages_full.Add(address);
124+
pfadd_dense(pages_full, data, size);
120125
} else if (stat.flags & MI_DFLY_HEAP_MISMATCH) {
121-
pages_with_heap_mismatch.Add(address);
126+
pfadd_dense(pages_with_heap_mismatch, data, size);
122127
} else if (stat.flags & MI_DFLY_PAGE_USED_FOR_MALLOC) {
123-
pages_reserved_for_malloc.Add(address);
128+
pfadd_dense(pages_reserved_for_malloc, data, size);
124129
} else {
125130
// We record usage only for pages which have usage above the given threshold but which are not
126131
// full. This allows tuning the threshold for future commands. This also excludes full pages,
127132
// so the only pages here have: threshold < usage% < 100
128-
pages_above_threshold.Add(address);
133+
pfadd_dense(pages_above_threshold, data, size);
129134
const double perc = static_cast<double>(stat.used) / static_cast<double>(stat.capacity);
130135
hdr_record_value(page_usage_hist, perc * 100);
131136
}
@@ -137,16 +142,18 @@ CollectedPageStats PageUsage::UniquePages::CollectedStats() const {
137142
for (const auto p : kUsageHistPoints) {
138143
usage[p] = hdr_value_at_percentile(page_usage_hist, p);
139144
}
140-
return CollectedPageStats{.pages_scanned = pages_scanned.size,
141-
.pages_marked_for_realloc = pages_marked_for_realloc.size,
142-
.pages_full = pages_full.size,
143-
.pages_reserved_for_malloc = pages_reserved_for_malloc.size,
144-
.pages_with_heap_mismatch = pages_with_heap_mismatch.size,
145-
.pages_above_threshold = pages_above_threshold.size,
146-
.objects_skipped_not_required = objects_skipped_not_required,
147-
.objects_skipped_not_supported = objects_skipped_not_supported,
148-
.page_usage_hist = std::move(usage),
149-
.shard_wide_summary = {}};
145+
146+
return CollectedPageStats{
147+
.pages_scanned = static_cast<uint64_t>(pfcountSingle(pages_scanned)),
148+
.pages_marked_for_realloc = static_cast<uint64_t>(pfcountSingle(pages_marked_for_realloc)),
149+
.pages_full = static_cast<uint64_t>(pfcountSingle(pages_full)),
150+
.pages_reserved_for_malloc = static_cast<uint64_t>(pfcountSingle(pages_reserved_for_malloc)),
151+
.pages_with_heap_mismatch = static_cast<uint64_t>(pfcountSingle(pages_with_heap_mismatch)),
152+
.pages_above_threshold = static_cast<uint64_t>(pfcountSingle(pages_above_threshold)),
153+
.objects_skipped_not_required = objects_skipped_not_required,
154+
.objects_skipped_not_supported = objects_skipped_not_supported,
155+
.page_usage_hist = std::move(usage),
156+
.shard_wide_summary = {}};
150157
}
151158

152159
PageUsage::PageUsage(CollectPageStats collect_stats, float threshold)

src/core/page_usage_stats.h

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,16 @@
99
#define MI_BUILD_RELEASE 1
1010
#include <mimalloc/types.h>
1111

12-
#include "core/bloom.h"
12+
extern "C" {
13+
#include "redis/hyperloglog.h"
14+
}
1315

1416
struct hdr_histogram;
1517

1618
namespace dfly {
1719

1820
enum class CollectPageStats : uint8_t { YES, NO };
1921

20-
struct FilterWithSize {
21-
FilterWithSize();
22-
SBF sbf;
23-
size_t size;
24-
void Add(uintptr_t);
25-
};
26-
2722
struct CollectedPageStats {
2823
double threshold{0.0};
2924
uint64_t pages_scanned{0};
@@ -72,12 +67,12 @@ class PageUsage {
7267
float threshold_;
7368

7469
struct UniquePages {
75-
FilterWithSize pages_scanned{};
76-
FilterWithSize pages_marked_for_realloc{};
77-
FilterWithSize pages_full{};
78-
FilterWithSize pages_reserved_for_malloc{};
79-
FilterWithSize pages_with_heap_mismatch{};
80-
FilterWithSize pages_above_threshold{};
70+
HllBufferPtr pages_scanned;
71+
HllBufferPtr pages_marked_for_realloc;
72+
HllBufferPtr pages_full;
73+
HllBufferPtr pages_reserved_for_malloc;
74+
HllBufferPtr pages_with_heap_mismatch;
75+
HllBufferPtr pages_above_threshold;
8176
hdr_histogram* page_usage_hist{};
8277

8378
uint64_t objects_skipped_not_required{0};

src/core/page_usage_stats_test.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ TEST_F(PageUsageStatsTest, StatCollection) {
135135
}
136136

137137
constexpr auto page_count_per_flag = 150;
138-
// allow for collisions in filter
139-
constexpr auto expected_min_count = 140;
140138

141139
auto start = 0;
142140
for (const uint8_t flag : {MI_DFLY_PAGE_FULL, MI_DFLY_PAGE_USED_FOR_MALLOC, MI_DFLY_HEAP_MISMATCH,
@@ -156,9 +154,11 @@ TEST_F(PageUsageStatsTest, StatCollection) {
156154
st.Merge(p.CollectedStats(), 1);
157155

158156
EXPECT_GT(st.pages_scanned, 12000);
159-
EXPECT_GT(st.pages_full, expected_min_count);
160-
EXPECT_GT(st.pages_reserved_for_malloc, expected_min_count);
161-
EXPECT_GT(st.pages_marked_for_realloc, expected_min_count);
157+
158+
// Expect a small error margin due to HLL
159+
EXPECT_NEAR(st.pages_full, page_count_per_flag, 5);
160+
EXPECT_NEAR(st.pages_reserved_for_malloc, page_count_per_flag, 5);
161+
EXPECT_NEAR(st.pages_marked_for_realloc, page_count_per_flag, 5);
162162

163163
const auto usage = st.shard_wide_summary;
164164

0 commit comments

Comments
 (0)