Skip to content

Commit b01b214

Browse files
author
Ingo Molnar
committed
mm/swap: Use local_lock for protection
The various struct pagevec per CPU variables are protected by disabling either preemption or interrupts across the critical sections. Inside these sections spinlocks have to be acquired. These spinlocks are regular spinlock_t types which are converted to "sleeping" spinlocks on PREEMPT_RT enabled kernels. Obviously sleeping locks cannot be acquired in preemption or interrupt disabled sections. local locks provide a trivial way to substitute preempt and interrupt disable instances. On a non PREEMPT_RT enabled kernel local_lock() maps to preempt_disable() and local_lock_irq() to local_irq_disable(). Create lru_rotate_pvecs containing the pagevec and the locallock. Create lru_pvecs containing the remaining pagevecs and the locallock. Add lru_add_drain_cpu_zone() which is used from compact_zone() to avoid exporting the pvec structure. Change the relevant call sites to acquire these locks instead of using preempt_disable() / get_cpu() / get_cpu_var() and local_irq_disable() / local_irq_save(). There is neither a functional change nor a change in the generated binary code for non PREEMPT_RT enabled non-debug kernels. When lockdep is enabled local locks have lockdep maps embedded. These allow lockdep to validate the protections, i.e. inappropriate usage of a preemption only protected sections would result in a lockdep warning while the same problem would not be noticed with a plain preempt_disable() based protection. local locks also improve readability as they provide a named scope for the protections while preempt/interrupt disable are opaque scopeless. Finally local locks allow PREEMPT_RT to substitute them with real locking primitives to ensure the correctness of operation in a fully preemptible kernel. [ bigeasy: Adopted to use local_lock ] Signed-off-by: Ingo Molnar <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Acked-by: Peter Zijlstra <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent cfa6705 commit b01b214

File tree

3 files changed

+82
-43
lines changed

3 files changed

+82
-43
lines changed

include/linux/swap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ extern void activate_page(struct page *);
337337
extern void mark_page_accessed(struct page *);
338338
extern void lru_add_drain(void);
339339
extern void lru_add_drain_cpu(int cpu);
340+
extern void lru_add_drain_cpu_zone(struct zone *zone);
340341
extern void lru_add_drain_all(void);
341342
extern void rotate_reclaimable_page(struct page *page);
342343
extern void deactivate_file_page(struct page *page);

mm/compaction.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2243,15 +2243,11 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
22432243
* would succeed.
22442244
*/
22452245
if (cc->order > 0 && last_migrated_pfn) {
2246-
int cpu;
22472246
unsigned long current_block_start =
22482247
block_start_pfn(cc->migrate_pfn, cc->order);
22492248

22502249
if (last_migrated_pfn < current_block_start) {
2251-
cpu = get_cpu();
2252-
lru_add_drain_cpu(cpu);
2253-
drain_local_pages(cc->zone);
2254-
put_cpu();
2250+
lru_add_drain_cpu_zone(cc->zone);
22552251
/* No more flushing until we migrate again */
22562252
last_migrated_pfn = 0;
22572253
}

mm/swap.c

Lines changed: 80 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <linux/uio.h>
3636
#include <linux/hugetlb.h>
3737
#include <linux/page_idle.h>
38+
#include <linux/local_lock.h>
3839

3940
#include "internal.h"
4041

@@ -44,14 +45,32 @@
4445
/* How many pages do we try to swap or page in/out together? */
4546
int page_cluster;
4647

47-
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
48-
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
49-
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
50-
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
51-
static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
48+
/* Protecting only lru_rotate.pvec which requires disabling interrupts */
49+
struct lru_rotate {
50+
local_lock_t lock;
51+
struct pagevec pvec;
52+
};
53+
static DEFINE_PER_CPU(struct lru_rotate, lru_rotate) = {
54+
.lock = INIT_LOCAL_LOCK(lock),
55+
};
56+
57+
/*
58+
* The following struct pagevec are grouped together because they are protected
59+
* by disabling preemption (and interrupts remain enabled).
60+
*/
61+
struct lru_pvecs {
62+
local_lock_t lock;
63+
struct pagevec lru_add;
64+
struct pagevec lru_deactivate_file;
65+
struct pagevec lru_deactivate;
66+
struct pagevec lru_lazyfree;
5267
#ifdef CONFIG_SMP
53-
static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
68+
struct pagevec activate_page;
5469
#endif
70+
};
71+
static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
72+
.lock = INIT_LOCAL_LOCK(lock),
73+
};
5574

5675
/*
5776
* This path almost never happens for VM activity - pages are normally
@@ -254,11 +273,11 @@ void rotate_reclaimable_page(struct page *page)
254273
unsigned long flags;
255274

256275
get_page(page);
257-
local_irq_save(flags);
258-
pvec = this_cpu_ptr(&lru_rotate_pvecs);
276+
local_lock_irqsave(&lru_rotate.lock, flags);
277+
pvec = this_cpu_ptr(&lru_rotate.pvec);
259278
if (!pagevec_add(pvec, page) || PageCompound(page))
260279
pagevec_move_tail(pvec);
261-
local_irq_restore(flags);
280+
local_unlock_irqrestore(&lru_rotate.lock, flags);
262281
}
263282
}
264283

@@ -293,27 +312,29 @@ static void __activate_page(struct page *page, struct lruvec *lruvec,
293312
#ifdef CONFIG_SMP
294313
static void activate_page_drain(int cpu)
295314
{
296-
struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
315+
struct pagevec *pvec = &per_cpu(lru_pvecs.activate_page, cpu);
297316

298317
if (pagevec_count(pvec))
299318
pagevec_lru_move_fn(pvec, __activate_page, NULL);
300319
}
301320

302321
static bool need_activate_page_drain(int cpu)
303322
{
304-
return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
323+
return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
305324
}
306325

307326
void activate_page(struct page *page)
308327
{
309328
page = compound_head(page);
310329
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
311-
struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
330+
struct pagevec *pvec;
312331

332+
local_lock(&lru_pvecs.lock);
333+
pvec = this_cpu_ptr(&lru_pvecs.activate_page);
313334
get_page(page);
314335
if (!pagevec_add(pvec, page) || PageCompound(page))
315336
pagevec_lru_move_fn(pvec, __activate_page, NULL);
316-
put_cpu_var(activate_page_pvecs);
337+
local_unlock(&lru_pvecs.lock);
317338
}
318339
}
319340

@@ -335,9 +356,12 @@ void activate_page(struct page *page)
335356

336357
static void __lru_cache_activate_page(struct page *page)
337358
{
338-
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
359+
struct pagevec *pvec;
339360
int i;
340361

362+
local_lock(&lru_pvecs.lock);
363+
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
364+
341365
/*
342366
* Search backwards on the optimistic assumption that the page being
343367
* activated has just been added to this pagevec. Note that only
@@ -357,7 +381,7 @@ static void __lru_cache_activate_page(struct page *page)
357381
}
358382
}
359383

360-
put_cpu_var(lru_add_pvec);
384+
local_unlock(&lru_pvecs.lock);
361385
}
362386

363387
/*
@@ -385,7 +409,7 @@ void mark_page_accessed(struct page *page)
385409
} else if (!PageActive(page)) {
386410
/*
387411
* If the page is on the LRU, queue it for activation via
388-
* activate_page_pvecs. Otherwise, assume the page is on a
412+
* lru_pvecs.activate_page. Otherwise, assume the page is on a
389413
* pagevec, mark it active and it'll be moved to the active
390414
* LRU on the next drain.
391415
*/
@@ -404,12 +428,14 @@ EXPORT_SYMBOL(mark_page_accessed);
404428

405429
static void __lru_cache_add(struct page *page)
406430
{
407-
struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
431+
struct pagevec *pvec;
408432

433+
local_lock(&lru_pvecs.lock);
434+
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
409435
get_page(page);
410436
if (!pagevec_add(pvec, page) || PageCompound(page))
411437
__pagevec_lru_add(pvec);
412-
put_cpu_var(lru_add_pvec);
438+
local_unlock(&lru_pvecs.lock);
413439
}
414440

415441
/**
@@ -593,30 +619,30 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
593619
*/
594620
void lru_add_drain_cpu(int cpu)
595621
{
596-
struct pagevec *pvec = &per_cpu(lru_add_pvec, cpu);
622+
struct pagevec *pvec = &per_cpu(lru_pvecs.lru_add, cpu);
597623

598624
if (pagevec_count(pvec))
599625
__pagevec_lru_add(pvec);
600626

601-
pvec = &per_cpu(lru_rotate_pvecs, cpu);
627+
pvec = &per_cpu(lru_rotate.pvec, cpu);
602628
if (pagevec_count(pvec)) {
603629
unsigned long flags;
604630

605631
/* No harm done if a racing interrupt already did this */
606-
local_irq_save(flags);
632+
local_lock_irqsave(&lru_rotate.lock, flags);
607633
pagevec_move_tail(pvec);
608-
local_irq_restore(flags);
634+
local_unlock_irqrestore(&lru_rotate.lock, flags);
609635
}
610636

611-
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
637+
pvec = &per_cpu(lru_pvecs.lru_deactivate_file, cpu);
612638
if (pagevec_count(pvec))
613639
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
614640

615-
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
641+
pvec = &per_cpu(lru_pvecs.lru_deactivate, cpu);
616642
if (pagevec_count(pvec))
617643
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
618644

619-
pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
645+
pvec = &per_cpu(lru_pvecs.lru_lazyfree, cpu);
620646
if (pagevec_count(pvec))
621647
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
622648

@@ -641,11 +667,14 @@ void deactivate_file_page(struct page *page)
641667
return;
642668

643669
if (likely(get_page_unless_zero(page))) {
644-
struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
670+
struct pagevec *pvec;
671+
672+
local_lock(&lru_pvecs.lock);
673+
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
645674

646675
if (!pagevec_add(pvec, page) || PageCompound(page))
647676
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
648-
put_cpu_var(lru_deactivate_file_pvecs);
677+
local_unlock(&lru_pvecs.lock);
649678
}
650679
}
651680

@@ -660,12 +689,14 @@ void deactivate_file_page(struct page *page)
660689
void deactivate_page(struct page *page)
661690
{
662691
if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
663-
struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
692+
struct pagevec *pvec;
664693

694+
local_lock(&lru_pvecs.lock);
695+
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
665696
get_page(page);
666697
if (!pagevec_add(pvec, page) || PageCompound(page))
667698
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
668-
put_cpu_var(lru_deactivate_pvecs);
699+
local_unlock(&lru_pvecs.lock);
669700
}
670701
}
671702

@@ -680,19 +711,30 @@ void mark_page_lazyfree(struct page *page)
680711
{
681712
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
682713
!PageSwapCache(page) && !PageUnevictable(page)) {
683-
struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
714+
struct pagevec *pvec;
684715

716+
local_lock(&lru_pvecs.lock);
717+
pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
685718
get_page(page);
686719
if (!pagevec_add(pvec, page) || PageCompound(page))
687720
pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
688-
put_cpu_var(lru_lazyfree_pvecs);
721+
local_unlock(&lru_pvecs.lock);
689722
}
690723
}
691724

692725
void lru_add_drain(void)
693726
{
694-
lru_add_drain_cpu(get_cpu());
695-
put_cpu();
727+
local_lock(&lru_pvecs.lock);
728+
lru_add_drain_cpu(smp_processor_id());
729+
local_unlock(&lru_pvecs.lock);
730+
}
731+
732+
void lru_add_drain_cpu_zone(struct zone *zone)
733+
{
734+
local_lock(&lru_pvecs.lock);
735+
lru_add_drain_cpu(smp_processor_id());
736+
drain_local_pages(zone);
737+
local_unlock(&lru_pvecs.lock);
696738
}
697739

698740
#ifdef CONFIG_SMP
@@ -743,11 +785,11 @@ void lru_add_drain_all(void)
743785
for_each_online_cpu(cpu) {
744786
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
745787

746-
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
747-
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
748-
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
749-
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
750-
pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
788+
if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
789+
pagevec_count(&per_cpu(lru_rotate.pvec, cpu)) ||
790+
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
791+
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
792+
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
751793
need_activate_page_drain(cpu)) {
752794
INIT_WORK(work, lru_add_drain_per_cpu);
753795
queue_work_on(cpu, mm_percpu_wq, work);

0 commit comments

Comments
 (0)