Skip to content

Commit e4d7770

Browse files
rgushchindennisszhou
authored andcommitted
percpu: optimize locking in pcpu_balance_workfn()
pcpu_balance_workfn() unconditionally calls pcpu_balance_free(), pcpu_reclaim_populated(), pcpu_balance_populated() and pcpu_balance_free() again. Each call to pcpu_balance_free() and pcpu_reclaim_populated() will cause at least one acquisition of the pcpu_lock. So even if the balancing was scheduled because of a failed atomic allocation, pcpu_lock will be acquired at least 4 times. This obviously increases the contention on the pcpu_lock. To optimize the scheme let's grab the pcpu_lock on the upper level (in pcpu_balance_workfn()) and keep it generally locked for the whole duration of the scheduled work, but release conditionally to perform any slow operations like chunk (de)population and creation of new chunks. Signed-off-by: Roman Gushchin <[email protected]> Signed-off-by: Dennis Zhou <[email protected]>
1 parent 4829c79 commit e4d7770

File tree

1 file changed

+29
-12
lines changed

1 file changed

+29
-12
lines changed

mm/percpu.c

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,19 +1980,22 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
19801980
* If empty_only is %false, reclaim all fully free chunks regardless of the
19811981
* number of populated pages. Otherwise, only reclaim chunks that have no
19821982
* populated pages.
1983+
*
1984+
* CONTEXT:
1985+
* pcpu_lock (can be dropped temporarily)
19831986
*/
19841987
static void pcpu_balance_free(bool empty_only)
19851988
{
19861989
LIST_HEAD(to_free);
19871990
struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
19881991
struct pcpu_chunk *chunk, *next;
19891992

1993+
lockdep_assert_held(&pcpu_lock);
1994+
19901995
/*
19911996
* There's no reason to keep around multiple unused chunks and VM
19921997
* areas can be scarce. Destroy all free chunks except for one.
19931998
*/
1994-
spin_lock_irq(&pcpu_lock);
1995-
19961999
list_for_each_entry_safe(chunk, next, free_head, list) {
19972000
WARN_ON(chunk->immutable);
19982001

@@ -2004,8 +2007,10 @@ static void pcpu_balance_free(bool empty_only)
20042007
list_move(&chunk->list, &to_free);
20052008
}
20062009

2007-
spin_unlock_irq(&pcpu_lock);
2010+
if (list_empty(&to_free))
2011+
return;
20082012

2013+
spin_unlock_irq(&pcpu_lock);
20092014
list_for_each_entry_safe(chunk, next, &to_free, list) {
20102015
unsigned int rs, re;
20112016

@@ -2019,6 +2024,7 @@ static void pcpu_balance_free(bool empty_only)
20192024
pcpu_destroy_chunk(chunk);
20202025
cond_resched();
20212026
}
2027+
spin_lock_irq(&pcpu_lock);
20222028
}
20232029

20242030
/**
@@ -2029,6 +2035,9 @@ static void pcpu_balance_free(bool empty_only)
20292035
* OOM killer to be triggered. We should avoid doing so until an actual
20302036
* allocation causes the failure as it is possible that requests can be
20312037
* serviced from already backed regions.
2038+
*
2039+
* CONTEXT:
2040+
* pcpu_lock (can be dropped temporarily)
20322041
*/
20332042
static void pcpu_balance_populated(void)
20342043
{
@@ -2037,6 +2046,8 @@ static void pcpu_balance_populated(void)
20372046
struct pcpu_chunk *chunk;
20382047
int slot, nr_to_pop, ret;
20392048

2049+
lockdep_assert_held(&pcpu_lock);
2050+
20402051
/*
20412052
* Ensure there are certain number of free populated pages for
20422053
* atomic allocs. Fill up from the most packed so that atomic
@@ -2064,13 +2075,11 @@ static void pcpu_balance_populated(void)
20642075
if (!nr_to_pop)
20652076
break;
20662077

2067-
spin_lock_irq(&pcpu_lock);
20682078
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
20692079
nr_unpop = chunk->nr_pages - chunk->nr_populated;
20702080
if (nr_unpop)
20712081
break;
20722082
}
2073-
spin_unlock_irq(&pcpu_lock);
20742083

20752084
if (!nr_unpop)
20762085
continue;
@@ -2080,12 +2089,13 @@ static void pcpu_balance_populated(void)
20802089
chunk->nr_pages) {
20812090
int nr = min_t(int, re - rs, nr_to_pop);
20822091

2092+
spin_unlock_irq(&pcpu_lock);
20832093
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
2094+
cond_resched();
2095+
spin_lock_irq(&pcpu_lock);
20842096
if (!ret) {
20852097
nr_to_pop -= nr;
2086-
spin_lock_irq(&pcpu_lock);
20872098
pcpu_chunk_populated(chunk, rs, rs + nr);
2088-
spin_unlock_irq(&pcpu_lock);
20892099
} else {
20902100
nr_to_pop = 0;
20912101
}
@@ -2097,11 +2107,12 @@ static void pcpu_balance_populated(void)
20972107

20982108
if (nr_to_pop) {
20992109
/* ran out of chunks to populate, create a new one and retry */
2110+
spin_unlock_irq(&pcpu_lock);
21002111
chunk = pcpu_create_chunk(gfp);
2112+
cond_resched();
2113+
spin_lock_irq(&pcpu_lock);
21012114
if (chunk) {
2102-
spin_lock_irq(&pcpu_lock);
21032115
pcpu_chunk_relocate(chunk, -1);
2104-
spin_unlock_irq(&pcpu_lock);
21052116
goto retry_pop;
21062117
}
21072118
}
@@ -2117,14 +2128,18 @@ static void pcpu_balance_populated(void)
21172128
* populated pages threshold, reintegrate the chunk if it has empty free pages.
21182129
* Each chunk is scanned in the reverse order to keep populated pages close to
21192130
* the beginning of the chunk.
2131+
*
2132+
* CONTEXT:
2133+
* pcpu_lock (can be dropped temporarily)
2134+
*
21202135
*/
21212136
static void pcpu_reclaim_populated(void)
21222137
{
21232138
struct pcpu_chunk *chunk;
21242139
struct pcpu_block_md *block;
21252140
int i, end;
21262141

2127-
spin_lock_irq(&pcpu_lock);
2142+
lockdep_assert_held(&pcpu_lock);
21282143

21292144
restart:
21302145
/*
@@ -2190,8 +2205,6 @@ static void pcpu_reclaim_populated(void)
21902205
list_move(&chunk->list,
21912206
&pcpu_chunk_lists[pcpu_sidelined_slot]);
21922207
}
2193-
2194-
spin_unlock_irq(&pcpu_lock);
21952208
}
21962209

21972210
/**
@@ -2212,10 +2225,14 @@ static void pcpu_balance_workfn(struct work_struct *work)
22122225
* appropriate.
22132226
*/
22142227
mutex_lock(&pcpu_alloc_mutex);
2228+
spin_lock_irq(&pcpu_lock);
2229+
22152230
pcpu_balance_free(false);
22162231
pcpu_reclaim_populated();
22172232
pcpu_balance_populated();
22182233
pcpu_balance_free(true);
2234+
2235+
spin_unlock_irq(&pcpu_lock);
22192236
mutex_unlock(&pcpu_alloc_mutex);
22202237
}
22212238

0 commit comments

Comments
 (0)