Skip to content

Commit 3301618

Browse files
authored
Merge pull request ceph#53130 from cyx1231st/wip-crimson-drop-mempool
mempool: avoid true sharing for the counters, with crimson Reviewed-by: Radoslaw Zarzynski <[email protected]> Reviewed-by: Chunmei Liu <[email protected]> Reviewed-by: Matan Breizman <[email protected]> Reviewed-by: Laura Flores <[email protected]>
2 parents e324cbf + 931894f commit 3301618

File tree

4 files changed

+96
-30
lines changed

4 files changed

+96
-30
lines changed

src/common/mempool.cc

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
#include "include/mempool.h"
1616
#include "include/demangle.h"
1717

18+
#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
19+
#else
1820
// Thread local variables should save index, not &shard[index],
1921
// because shard[] is defined in the class
2022
static thread_local size_t thread_shard_index = mempool::num_shards;
23+
#endif
2124

2225
// default to debug_mode off
2326
bool mempool::debug_mode = false;
@@ -95,9 +98,21 @@ size_t mempool::pool_t::allocated_items() const
9598

9699
void mempool::pool_t::adjust_count(ssize_t items, ssize_t bytes)
97100
{
98-
thread_shard_index = (thread_shard_index == num_shards) ? pick_a_shard_int() : thread_shard_index;
99-
shard[thread_shard_index].items += items;
100-
shard[thread_shard_index].bytes += bytes;
101+
#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
102+
// the expected path: we alway pick the shard for a cpu core
103+
// a thread is executing on.
104+
const size_t shard_index = pick_a_shard_int();
105+
#else
106+
// fallback for lack of sched_getcpu()
107+
const size_t shard_index = []() {
108+
if (thread_shard_index == num_shards) {
109+
thread_shard_index = pick_a_shard_int();
110+
}
111+
return thread_shard_index;
112+
}();
113+
#endif
114+
shard[shard_index].items += items;
115+
shard[shard_index].bytes += bytes;
101116
}
102117

103118
void mempool::pool_t::get_stats(
@@ -113,8 +128,17 @@ void mempool::pool_t::get_stats(
113128
for (auto &p : type_map) {
114129
std::string n = ceph_demangle(p.second.type_name);
115130
stats_t &s = (*by_type)[n];
131+
#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
132+
s.bytes = 0;
133+
s.items = 0;
134+
for (size_t i = 0 ; i < num_shards; ++i) {
135+
s.bytes += p.second.shards[i].items * p.second.item_size;
136+
s.items += p.second.shards[i].items;
137+
}
138+
#else
116139
s.bytes = p.second.items * p.second.item_size;
117140
s.items = p.second.items;
141+
#endif
118142
}
119143
}
120144
}

src/include/mempool.h

Lines changed: 67 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
#include <boost/container/flat_set.hpp>
2727
#include <boost/container/flat_map.hpp>
2828

29+
#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
30+
# include <sched.h>
31+
#endif
32+
2933
#include "common/Formatter.h"
3034
#include "common/ceph_atomic.h"
3135
#include "include/ceph_assert.h"
@@ -201,6 +205,24 @@ enum {
201205
num_shards = 1 << num_shard_bits
202206
};
203207

208+
static size_t pick_a_shard_int() {
209+
#if defined(_GNU_SOURCE) && defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
210+
// a thread local storage is actually just an approximation;
211+
// what we truly want is a _cpu local storage_.
212+
//
213+
// on the architectures we care about sched_getcpu() is
214+
// a syscall-handled-in-userspace (vdso!). it grabs the cpu
215+
// id kernel exposes to a task on context switch.
216+
return sched_getcpu() & ((1 << num_shard_bits) - 1);
217+
#else
218+
// Dirt cheap, see:
219+
// https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
220+
size_t me = (size_t)pthread_self();
221+
size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
222+
return i;
223+
#endif
224+
}
225+
204226
//
205227
// Align shard to a cacheline.
206228
//
@@ -240,7 +262,18 @@ const char *get_pool_name(pool_index_t ix);
240262
struct type_t {
241263
const char *type_name;
242264
size_t item_size;
265+
#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
266+
struct type_shard_t {
267+
ceph::atomic<ssize_t> items = {0}; // signed
268+
char __padding[128 - sizeof(ceph::atomic<ssize_t>)];
269+
} __attribute__ ((aligned (128)));
270+
static_assert(sizeof(type_shard_t) == 128,
271+
"type_shard_t should be cacheline-sized");
272+
type_shard_t shards[num_shards];
273+
#else
274+
// XXX: consider dropping this case for classic with perf tests
243275
ceph::atomic<ssize_t> items = {0}; // signed
276+
#endif
244277
};
245278

246279
struct type_info_hash {
@@ -255,6 +288,8 @@ class pool_t {
255288
mutable std::mutex lock; // only used for types list
256289
std::unordered_map<const char *, type_t> type_map;
257290

291+
template<pool_index_t, typename T>
292+
friend class pool_allocator;
258293
public:
259294
//
260295
// How much this pool consumes. O(<num_shards>)
@@ -264,19 +299,6 @@ class pool_t {
264299

265300
void adjust_count(ssize_t items, ssize_t bytes);
266301

267-
static size_t pick_a_shard_int() {
268-
// Dirt cheap, see:
269-
// https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
270-
size_t me = (size_t)pthread_self();
271-
size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
272-
return i;
273-
}
274-
275-
shard_t* pick_a_shard() {
276-
size_t i = pick_a_shard_int();
277-
return &shard[i];
278-
}
279-
280302
type_t *get_type(const std::type_info& ti, size_t size) {
281303
std::lock_guard<std::mutex> l(lock);
282304
auto p = type_map.find(ti.name());
@@ -339,34 +361,49 @@ class pool_allocator {
339361

340362
T* allocate(size_t n, void *p = nullptr) {
341363
size_t total = sizeof(T) * n;
342-
shard_t *shard = pool->pick_a_shard();
343-
shard->bytes += total;
344-
shard->items += n;
364+
const auto shid = pick_a_shard_int();
365+
auto& shard = pool->shard[shid];
366+
shard.bytes += total;
367+
shard.items += n;
345368
if (type) {
369+
#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
370+
type->shards[shid].items += n;
371+
#else
346372
type->items += n;
373+
#endif
347374
}
348375
T* r = reinterpret_cast<T*>(new char[total]);
349376
return r;
350377
}
351378

352379
void deallocate(T* p, size_t n) {
353380
size_t total = sizeof(T) * n;
354-
shard_t *shard = pool->pick_a_shard();
355-
shard->bytes -= total;
356-
shard->items -= n;
381+
const auto shid = pick_a_shard_int();
382+
auto& shard = pool->shard[shid];
383+
shard.bytes -= total;
384+
shard.items -= n;
357385
if (type) {
386+
#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
387+
type->shards[shid].items -= n;
388+
#else
358389
type->items -= n;
390+
#endif
359391
}
360392
delete[] reinterpret_cast<char*>(p);
361393
}
362394

363395
T* allocate_aligned(size_t n, size_t align, void *p = nullptr) {
364396
size_t total = sizeof(T) * n;
365-
shard_t *shard = pool->pick_a_shard();
366-
shard->bytes += total;
367-
shard->items += n;
397+
const auto shid = pick_a_shard_int();
398+
auto& shard = pool->shard[shid];
399+
shard.bytes += total;
400+
shard.items += n;
368401
if (type) {
402+
#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
403+
type->shards[shid].items += n;
404+
#else
369405
type->items += n;
406+
#endif
370407
}
371408
char *ptr;
372409
int rc = ::posix_memalign((void**)(void*)&ptr, align, total);
@@ -378,11 +415,16 @@ class pool_allocator {
378415

379416
void deallocate_aligned(T* p, size_t n) {
380417
size_t total = sizeof(T) * n;
381-
shard_t *shard = pool->pick_a_shard();
382-
shard->bytes -= total;
383-
shard->items -= n;
418+
const auto shid = pick_a_shard_int();
419+
auto& shard = pool->shard[shid];
420+
shard.bytes -= total;
421+
shard.items -= n;
384422
if (type) {
423+
#if defined(WITH_SEASTAR) && !defined(WITH_ALIEN)
424+
type->shards[shid].items -= n;
425+
#else
385426
type->items -= n;
427+
#endif
386428
}
387429
aligned_free(p);
388430
}

src/test/test_c2c.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ int main(int argc, const char **argv)
7070
while(1) {
7171
size_t i;
7272
if (sharding) {
73-
i = mempool::pool_t::pick_a_shard_int();
73+
i = mempool::pick_a_shard_int();
7474
} else {
7575
i = 0;
7676
}

src/test/test_mempool.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ TEST(mempool, check_shard_select)
412412
for (size_t i = 0; i < samples; i++) {
413413
workers.push_back(
414414
std::thread([&](){
415-
size_t i = mempool::pool_t::pick_a_shard_int();
415+
size_t i = mempool::pick_a_shard_int();
416416
shards[i]++;
417417
}));
418418
}

0 commit comments

Comments
 (0)