mempool: implement sharding for type_t::items

rzarzynski · cyx1231st · commit fd05b5a1bcc9 · 2023-11-23T09:53:06.000+08:00
It looks this counter was initially thought to be a debug one
but -- because of the `force_register` --  flag it is widely
used now.

Found by Yingxin Cheng &lt;yingxin.cheng@intel.com&gt;

Signed-off-by: Radosław Zarzyński &lt;rzarzyns@redhat.com&gt;
diff --git a/src/common/mempool.cc b/src/common/mempool.cc
@@ -127,8 +127,12 @@ void mempool::pool_t::get_stats(
     for (auto &p : type_map) {
       std::string n = ceph_demangle(p.second.type_name);
       stats_t &s = (*by_type)[n];
-      s.bytes = p.second.items * p.second.item_size;
-      s.items = p.second.items;
+      s.bytes = 0;
+      s.items = 0;
+      for (size_t i = 0 ; i < num_shards; ++i) {
+        s.bytes += p.second.shards[i].items * p.second.item_size;
+        s.items += p.second.shards[i].items;
+      }
     }
   }
 }
diff --git a/src/include/mempool.h b/src/include/mempool.h
@@ -205,6 +205,24 @@ enum {
   num_shards = 1 << num_shard_bits
 };
 
+static size_t pick_a_shard_int() {
+#ifndef _GNU_SOURCE
+  // Dirt cheap, see:
+  //   https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
+  size_t me = (size_t)pthread_self();
+  size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
+  return i;
+#else
+  // a thread local storage is actually just an approximation;
+  // what we truly want is a _cpu local storage_.
+  //
+  // on the architectures we care about sched_getcpu() is
+  // a syscall-handled-in-userspace (vdso!). it grabs the cpu
+  // id kernel exposes to a task on context switch.
+  return sched_getcpu() & ((1 << num_shard_bits) - 1);
+#endif
+}
+
 //
 // Align shard to a cacheline.
 //
@@ -244,9 +262,16 @@ const char *get_pool_name(pool_index_t ix);
 struct type_t {
   const char *type_name;
   size_t item_size;
-  ceph::atomic<ssize_t> items = {0};  // signed
+  struct type_shard_t {
+    ceph::atomic<ssize_t> items = {0}; // signed
+    char __padding[128 - sizeof(ceph::atomic<ssize_t>)];
+  } __attribute__ ((aligned (128)));
+  type_shard_t shards[num_shards];
 };
 
+static_assert(sizeof(type_t::type_shard_t) == 128,
+	      "type_shard_t should be cacheline-sized");
+
 struct type_info_hash {
   std::size_t operator()(const std::type_info& k) const {
     return k.hash_code();
@@ -259,6 +284,8 @@ class pool_t {
   mutable std::mutex lock;  // only used for types list
   std::unordered_map<const char *, type_t> type_map;
 
+  template<pool_index_t, typename T>
+  friend class pool_allocator;
 public:
   //
   // How much this pool consumes. O(<num_shards>)
@@ -268,29 +295,6 @@ class pool_t {
 
   void adjust_count(ssize_t items, ssize_t bytes);
 
-  static size_t pick_a_shard_int() {
-#ifndef _GNU_SOURCE
-    // Dirt cheap, see:
-    //   https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
-    size_t me = (size_t)pthread_self();
-    size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
-    return i;
-#else
-    // a thread local storage is actually just an approximation;
-    // what we truly want is a _cpu local storage_.
-    //
-    // on the architectures we care about sched_getcpu() is
-    // a syscall-handled-in-userspace (vdso!). it grabs the cpu
-    // id kernel exposes to a task on context switch.
-    return sched_getcpu() & ((1 << num_shard_bits) - 1);
-#endif
-  }
-
-  shard_t* pick_a_shard() {
-    size_t i = pick_a_shard_int();
-    return &shard[i];
-  }
-
   type_t *get_type(const std::type_info& ti, size_t size) {
     std::lock_guard<std::mutex> l(lock);
     auto p = type_map.find(ti.name());
@@ -353,34 +357,37 @@ class pool_allocator {
 
   T* allocate(size_t n, void *p = nullptr) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes += total;
-    shard->items += n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes += total;
+    shard.items += n;
     if (type) {
-      type->items += n;
+      type->shards[shid].items += n;
     }
     T* r = reinterpret_cast<T*>(new char[total]);
     return r;
   }
 
   void deallocate(T* p, size_t n) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes -= total;
-    shard->items -= n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes -= total;
+    shard.items -= n;
     if (type) {
-      type->items -= n;
+      type->shards[shid].items -= n;
     }
     delete[] reinterpret_cast<char*>(p);
   }
 
   T* allocate_aligned(size_t n, size_t align, void *p = nullptr) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes += total;
-    shard->items += n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes += total;
+    shard.items += n;
     if (type) {
-      type->items += n;
+      type->shards[shid].items += n;
     }
     char *ptr;
     int rc = ::posix_memalign((void**)(void*)&ptr, align, total);
@@ -392,11 +399,12 @@ class pool_allocator {
 
   void deallocate_aligned(T* p, size_t n) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes -= total;
-    shard->items -= n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes -= total;
+    shard.items -= n;
     if (type) {
-      type->items -= n;
+      type->shards[shid].items -= n;
     }
     aligned_free(p);
   }
diff --git a/src/test/test_c2c.cc b/src/test/test_c2c.cc
@@ -70,7 +70,7 @@ int main(int argc, const char **argv)
 	  while(1) {
 	    size_t i;
 	    if (sharding) {
-	      i = mempool::pool_t::pick_a_shard_int();
+	      i = mempool::pick_a_shard_int();
 	    } else {
 	      i = 0;
 	    }

Original file line number	Diff line number	Diff line change
`@@ -127,8 +127,12 @@ void mempool::pool_t::get_stats(`
`127`	`127`	`for (auto &p : type_map) {`
`128`	`128`	`std::string n = ceph_demangle(p.second.type_name);`
`129`	`129`	`stats_t &s = (*by_type)[n];`
`130`		`- s.bytes = p.second.items * p.second.item_size;`
`131`		`- s.items = p.second.items;`
	`130`	`+ s.bytes = 0;`
	`131`	`+ s.items = 0;`
	`132`	`+ for (size_t i = 0 ; i < num_shards; ++i) {`
	`133`	`+ s.bytes += p.second.shards[i].items * p.second.item_size;`
	`134`	`+ s.items += p.second.shards[i].items;`
	`135`	`+ }`
`132`	`136`	`}`
`133`	`137`	`}`
`134`	`138`	`}`
Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ int main(int argc, const char **argv)`
`70`	`70`	`while(1) {`
`71`	`71`	`size_t i;`
`72`	`72`	`if (sharding) {`
`73`		`- i = mempool::pool_t::pick_a_shard_int();`
	`73`	`+ i = mempool::pick_a_shard_int();`
`74`	`74`	`} else {`
`75`	`75`	`i = 0;`
`76`	`76`	`}`