Skip to content

Commit 14ef95b

Browse files
mjguzikdennisszhou
authored andcommitted
kernel/fork: group allocation/free of per-cpu counters for mm struct
A trivial execve scalability test which tries to be very friendly (statically linked binaries, all separate) is predominantly bottlenecked by back-to-back per-cpu counter allocations which serialize on global locks. Ease the pain by allocating and freeing them in one go. Bench can be found here: http://apollo.backplane.com/DFlyMisc/doexec.c $ cc -static -O2 -o static-doexec doexec.c $ ./static-doexec $(nproc) Even at a very modest scale of 26 cores (ops/s): before: 133543.63 after: 186061.81 (+39%) While with the patch these allocations remain a significant problem, the primary bottleneck shifts to page release handling. Signed-off-by: Mateusz Guzik <[email protected]> Link: https://lore.kernel.org/r/[email protected] [Dennis: reflowed 1 line] Signed-off-by: Dennis Zhou <[email protected]>
1 parent c439d5e commit 14ef95b

File tree

1 file changed

+4
-11
lines changed

1 file changed

+4
-11
lines changed

kernel/fork.c

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -909,8 +909,6 @@ static void cleanup_lazy_tlbs(struct mm_struct *mm)
909909
*/
910910
void __mmdrop(struct mm_struct *mm)
911911
{
912-
int i;
913-
914912
BUG_ON(mm == &init_mm);
915913
WARN_ON_ONCE(mm == current->mm);
916914

@@ -925,9 +923,8 @@ void __mmdrop(struct mm_struct *mm)
925923
put_user_ns(mm->user_ns);
926924
mm_pasid_drop(mm);
927925
mm_destroy_cid(mm);
926+
percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS);
928927

929-
for (i = 0; i < NR_MM_COUNTERS; i++)
930-
percpu_counter_destroy(&mm->rss_stat[i]);
931928
free_mm(mm);
932929
}
933930
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1252,8 +1249,6 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
12521249
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
12531250
struct user_namespace *user_ns)
12541251
{
1255-
int i;
1256-
12571252
mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
12581253
mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
12591254
atomic_set(&mm->mm_users, 1);
@@ -1301,17 +1296,15 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
13011296
if (mm_alloc_cid(mm))
13021297
goto fail_cid;
13031298

1304-
for (i = 0; i < NR_MM_COUNTERS; i++)
1305-
if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
1306-
goto fail_pcpu;
1299+
if (percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT,
1300+
NR_MM_COUNTERS))
1301+
goto fail_pcpu;
13071302

13081303
mm->user_ns = get_user_ns(user_ns);
13091304
lru_gen_init_mm(mm);
13101305
return mm;
13111306

13121307
fail_pcpu:
1313-
while (i > 0)
1314-
percpu_counter_destroy(&mm->rss_stat[--i]);
13151308
mm_destroy_cid(mm);
13161309
fail_cid:
13171310
destroy_context(mm);

0 commit comments

Comments
 (0)