@@ -205,6 +205,24 @@ enum {
205205 num_shards = 1 << num_shard_bits
206206};
207207
208+ static size_t pick_a_shard_int () {
209+ #ifndef _GNU_SOURCE
210+ // Dirt cheap, see:
211+ // https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
212+ size_t me = (size_t )pthread_self ();
213+ size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1 );
214+ return i;
215+ #else
216+ // a thread local storage is actually just an approximation;
217+ // what we truly want is a _cpu local storage_.
218+ //
219+ // on the architectures we care about sched_getcpu() is
220+ // a syscall-handled-in-userspace (vdso!). it grabs the cpu
221+ // id kernel exposes to a task on context switch.
222+ return sched_getcpu () & ((1 << num_shard_bits) - 1 );
223+ #endif
224+ }
225+
208226//
209227// Align shard to a cacheline.
210228//
@@ -244,9 +262,16 @@ const char *get_pool_name(pool_index_t ix);
244262struct type_t {
245263 const char *type_name;
246264 size_t item_size;
247- ceph::atomic<ssize_t > items = {0 }; // signed
265+ struct type_shard_t {
266+ ceph::atomic<ssize_t > items = {0 }; // signed
267+ char __padding[128 - sizeof (ceph::atomic<ssize_t >)];
268+ } __attribute__ ((aligned (128 )));
269+ type_shard_t shards[num_shards];
248270};
249271
272+ static_assert (sizeof (type_t ::type_shard_t ) == 128 ,
273+ " type_shard_t should be cacheline-sized" );
274+
250275struct type_info_hash {
251276 std::size_t operator ()(const std::type_info& k) const {
252277 return k.hash_code ();
@@ -259,6 +284,8 @@ class pool_t {
259284 mutable std::mutex lock; // only used for types list
260285 std::unordered_map<const char *, type_t > type_map;
261286
287+ template <pool_index_t , typename T>
288+ friend class pool_allocator ;
262289public:
263290 //
264291 // How much this pool consumes. O(<num_shards>)
@@ -268,29 +295,6 @@ class pool_t {
268295
269296 void adjust_count (ssize_t items, ssize_t bytes);
270297
271- static size_t pick_a_shard_int () {
272- #ifndef _GNU_SOURCE
273- // Dirt cheap, see:
274- // https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
275- size_t me = (size_t )pthread_self ();
276- size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1 );
277- return i;
278- #else
279- // a thread local storage is actually just an approximation;
280- // what we truly want is a _cpu local storage_.
281- //
282- // on the architectures we care about sched_getcpu() is
283- // a syscall-handled-in-userspace (vdso!). it grabs the cpu
284- // id kernel exposes to a task on context switch.
285- return sched_getcpu () & ((1 << num_shard_bits) - 1 );
286- #endif
287- }
288-
289- shard_t * pick_a_shard () {
290- size_t i = pick_a_shard_int ();
291- return &shard[i];
292- }
293-
294298 type_t *get_type (const std::type_info& ti, size_t size) {
295299 std::lock_guard<std::mutex> l (lock);
296300 auto p = type_map.find (ti.name ());
@@ -353,34 +357,37 @@ class pool_allocator {
353357
354358 T* allocate (size_t n, void *p = nullptr ) {
355359 size_t total = sizeof (T) * n;
356- shard_t *shard = pool->pick_a_shard ();
357- shard->bytes += total;
358- shard->items += n;
360+ const auto shid = pick_a_shard_int ();
361+ auto & shard = pool->shard [shid];
362+ shard.bytes += total;
363+ shard.items += n;
359364 if (type) {
360- type->items += n;
365+ type->shards [shid]. items += n;
361366 }
362367 T* r = reinterpret_cast <T*>(new char [total]);
363368 return r;
364369 }
365370
366371 void deallocate (T* p, size_t n) {
367372 size_t total = sizeof (T) * n;
368- shard_t *shard = pool->pick_a_shard ();
369- shard->bytes -= total;
370- shard->items -= n;
373+ const auto shid = pick_a_shard_int ();
374+ auto & shard = pool->shard [shid];
375+ shard.bytes -= total;
376+ shard.items -= n;
371377 if (type) {
372- type->items -= n;
378+ type->shards [shid]. items -= n;
373379 }
374380 delete[] reinterpret_cast <char *>(p);
375381 }
376382
377383 T* allocate_aligned (size_t n, size_t align, void *p = nullptr ) {
378384 size_t total = sizeof (T) * n;
379- shard_t *shard = pool->pick_a_shard ();
380- shard->bytes += total;
381- shard->items += n;
385+ const auto shid = pick_a_shard_int ();
386+ auto & shard = pool->shard [shid];
387+ shard.bytes += total;
388+ shard.items += n;
382389 if (type) {
383- type->items += n;
390+ type->shards [shid]. items += n;
384391 }
385392 char *ptr;
386393 int rc = ::posix_memalign ((void **)(void *)&ptr, align, total);
@@ -392,11 +399,12 @@ class pool_allocator {
392399
393400 void deallocate_aligned (T* p, size_t n) {
394401 size_t total = sizeof (T) * n;
395- shard_t *shard = pool->pick_a_shard ();
396- shard->bytes -= total;
397- shard->items -= n;
402+ const auto shid = pick_a_shard_int ();
403+ auto & shard = pool->shard [shid];
404+ shard.bytes -= total;
405+ shard.items -= n;
398406 if (type) {
399- type->items -= n;
407+ type->shards [shid]. items -= n;
400408 }
401409 aligned_free (p);
402410 }
0 commit comments