Skip to content

Commit 9917ff5

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "5 patches. Subsystems affected by this patch series: binfmt, procfs, and mm (vmscan, memcg, and kfence)" * emailed patches from Andrew Morton <[email protected]>: kfence: make test case compatible with run time set sample interval mm: memcg: synchronize objcg lists with a dedicated spinlock mm: vmscan: remove deadlock due to throttling failing to make progress fs/proc: task_mmu.c: don't read mapcount for migration entry fs/binfmt_elf: fix PT_LOAD p_align values for loaders
2 parents 83e3966 + 8913c61 commit 9917ff5

File tree

8 files changed

+51
-23
lines changed

8 files changed

+51
-23
lines changed

fs/binfmt_elf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1117,7 +1117,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
11171117
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
11181118
*/
11191119
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1120-
if (alignment > ELF_MIN_ALIGN) {
1120+
if (interpreter || alignment > ELF_MIN_ALIGN) {
11211121
load_bias = ELF_ET_DYN_BASE;
11221122
if (current->flags & PF_RANDOMIZE)
11231123
load_bias += arch_mmap_rnd();

fs/proc/task_mmu.c

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
440440
}
441441

442442
static void smaps_account(struct mem_size_stats *mss, struct page *page,
443-
bool compound, bool young, bool dirty, bool locked)
443+
bool compound, bool young, bool dirty, bool locked,
444+
bool migration)
444445
{
445446
int i, nr = compound ? compound_nr(page) : 1;
446447
unsigned long size = nr * PAGE_SIZE;
@@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
467468
* page_count(page) == 1 guarantees the page is mapped exactly once.
468469
* If any subpage of the compound page mapped with PTE it would elevate
469470
* page_count().
471+
*
472+
* The page_mapcount() is called to get a snapshot of the mapcount.
473+
* Without holding the page lock this snapshot can be slightly wrong as
474+
* we cannot always read the mapcount atomically. It is not safe to
475+
* call page_mapcount() even with PTL held if the page is not mapped,
476+
* especially for migration entries. Treat regular migration entries
477+
* as mapcount == 1.
470478
*/
471-
if (page_count(page) == 1) {
479+
if ((page_count(page) == 1) || migration) {
472480
smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty,
473481
locked, true);
474482
return;
@@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
517525
struct vm_area_struct *vma = walk->vma;
518526
bool locked = !!(vma->vm_flags & VM_LOCKED);
519527
struct page *page = NULL;
528+
bool migration = false;
520529

521530
if (pte_present(*pte)) {
522531
page = vm_normal_page(vma, addr, *pte);
@@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
536545
} else {
537546
mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
538547
}
539-
} else if (is_pfn_swap_entry(swpent))
548+
} else if (is_pfn_swap_entry(swpent)) {
549+
if (is_migration_entry(swpent))
550+
migration = true;
540551
page = pfn_swap_entry_to_page(swpent);
552+
}
541553
} else {
542554
smaps_pte_hole_lookup(addr, walk);
543555
return;
@@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
546558
if (!page)
547559
return;
548560

549-
smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked);
561+
smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
562+
locked, migration);
550563
}
551564

552565
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -557,15 +570,18 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
557570
struct vm_area_struct *vma = walk->vma;
558571
bool locked = !!(vma->vm_flags & VM_LOCKED);
559572
struct page *page = NULL;
573+
bool migration = false;
560574

561575
if (pmd_present(*pmd)) {
562576
/* FOLL_DUMP will return -EFAULT on huge zero page */
563577
page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
564578
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
565579
swp_entry_t entry = pmd_to_swp_entry(*pmd);
566580

567-
if (is_migration_entry(entry))
581+
if (is_migration_entry(entry)) {
582+
migration = true;
568583
page = pfn_swap_entry_to_page(entry);
584+
}
569585
}
570586
if (IS_ERR_OR_NULL(page))
571587
return;
@@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
577593
/* pass */;
578594
else
579595
mss->file_thp += HPAGE_PMD_SIZE;
580-
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
596+
597+
smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
598+
locked, migration);
581599
}
582600
#else
583601
static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
13781396
{
13791397
u64 frame = 0, flags = 0;
13801398
struct page *page = NULL;
1399+
bool migration = false;
13811400

13821401
if (pte_present(pte)) {
13831402
if (pm->show_pfn)
@@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
13991418
frame = swp_type(entry) |
14001419
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
14011420
flags |= PM_SWAP;
1421+
migration = is_migration_entry(entry);
14021422
if (is_pfn_swap_entry(entry))
14031423
page = pfn_swap_entry_to_page(entry);
14041424
}
14051425

14061426
if (page && !PageAnon(page))
14071427
flags |= PM_FILE;
1408-
if (page && page_mapcount(page) == 1)
1428+
if (page && !migration && page_mapcount(page) == 1)
14091429
flags |= PM_MMAP_EXCLUSIVE;
14101430
if (vma->vm_flags & VM_SOFTDIRTY)
14111431
flags |= PM_SOFT_DIRTY;
@@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
14211441
spinlock_t *ptl;
14221442
pte_t *pte, *orig_pte;
14231443
int err = 0;
1424-
14251444
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1445+
bool migration = false;
1446+
14261447
ptl = pmd_trans_huge_lock(pmdp, vma);
14271448
if (ptl) {
14281449
u64 flags = 0, frame = 0;
@@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
14611482
if (pmd_swp_uffd_wp(pmd))
14621483
flags |= PM_UFFD_WP;
14631484
VM_BUG_ON(!is_pmd_migration_entry(pmd));
1485+
migration = is_migration_entry(entry);
14641486
page = pfn_swap_entry_to_page(entry);
14651487
}
14661488
#endif
14671489

1468-
if (page && page_mapcount(page) == 1)
1490+
if (page && !migration && page_mapcount(page) == 1)
14691491
flags |= PM_MMAP_EXCLUSIVE;
14701492

14711493
for (; addr != end; addr += PAGE_SIZE) {

include/linux/kfence.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <linux/atomic.h>
1818
#include <linux/static_key.h>
1919

20+
extern unsigned long kfence_sample_interval;
21+
2022
/*
2123
* We allocate an even number of pages, as it simplifies calculations to map
2224
* address to metadata indices; effectively, the very first page serves as an

include/linux/memcontrol.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ struct obj_cgroup {
219219
struct mem_cgroup *memcg;
220220
atomic_t nr_charged_bytes;
221221
union {
222-
struct list_head list;
222+
struct list_head list; /* protected by objcg_lock */
223223
struct rcu_head rcu;
224224
};
225225
};
@@ -315,7 +315,8 @@ struct mem_cgroup {
315315
#ifdef CONFIG_MEMCG_KMEM
316316
int kmemcg_id;
317317
struct obj_cgroup __rcu *objcg;
318-
struct list_head objcg_list; /* list of inherited objcgs */
318+
/* list of inherited objcgs, protected by objcg_lock */
319+
struct list_head objcg_list;
319320
#endif
320321

321322
MEMCG_PADDING(_pad2_);

mm/kfence/core.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@
4747

4848
static bool kfence_enabled __read_mostly;
4949

50-
static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
50+
unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
51+
EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */
5152

5253
#ifdef MODULE_PARAM_PREFIX
5354
#undef MODULE_PARAM_PREFIX

mm/kfence/kfence_test.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,13 +268,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
268268
* 100x the sample interval should be more than enough to ensure we get
269269
* a KFENCE allocation eventually.
270270
*/
271-
timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
271+
timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
272272
/*
273273
* Especially for non-preemption kernels, ensure the allocation-gate
274274
* timer can catch up: after @resched_after, every failed allocation
275275
* attempt yields, to ensure the allocation-gate timer is scheduled.
276276
*/
277-
resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL);
277+
resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval);
278278
do {
279279
if (test_cache)
280280
alloc = kmem_cache_alloc(test_cache, gfp);
@@ -608,7 +608,7 @@ static void test_gfpzero(struct kunit *test)
608608
int i;
609609

610610
/* Skip if we think it'd take too long. */
611-
KFENCE_TEST_REQUIRES(test, CONFIG_KFENCE_SAMPLE_INTERVAL <= 100);
611+
KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100);
612612

613613
setup_test_cache(test, size, 0, NULL);
614614
buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
@@ -739,7 +739,7 @@ static void test_memcache_alloc_bulk(struct kunit *test)
739739
* 100x the sample interval should be more than enough to ensure we get
740740
* a KFENCE allocation eventually.
741741
*/
742-
timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
742+
timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval);
743743
do {
744744
void *objects[100];
745745
int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),

mm/memcontrol.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
254254
}
255255

256256
#ifdef CONFIG_MEMCG_KMEM
257-
extern spinlock_t css_set_lock;
257+
static DEFINE_SPINLOCK(objcg_lock);
258258

259259
bool mem_cgroup_kmem_disabled(void)
260260
{
@@ -298,9 +298,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
298298
if (nr_pages)
299299
obj_cgroup_uncharge_pages(objcg, nr_pages);
300300

301-
spin_lock_irqsave(&css_set_lock, flags);
301+
spin_lock_irqsave(&objcg_lock, flags);
302302
list_del(&objcg->list);
303-
spin_unlock_irqrestore(&css_set_lock, flags);
303+
spin_unlock_irqrestore(&objcg_lock, flags);
304304

305305
percpu_ref_exit(ref);
306306
kfree_rcu(objcg, rcu);
@@ -332,7 +332,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
332332

333333
objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
334334

335-
spin_lock_irq(&css_set_lock);
335+
spin_lock_irq(&objcg_lock);
336336

337337
/* 1) Ready to reparent active objcg. */
338338
list_add(&objcg->list, &memcg->objcg_list);
@@ -342,7 +342,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
342342
/* 3) Move already reparented objcgs to the parent's list */
343343
list_splice(&memcg->objcg_list, &parent->objcg_list);
344344

345-
spin_unlock_irq(&css_set_lock);
345+
spin_unlock_irq(&objcg_lock);
346346

347347
percpu_ref_kill(&objcg->refcnt);
348348
}

mm/vmscan.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1066,8 +1066,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
10661066
* forward progress (e.g. journalling workqueues or kthreads).
10671067
*/
10681068
if (!current_is_kswapd() &&
1069-
current->flags & (PF_IO_WORKER|PF_KTHREAD))
1069+
current->flags & (PF_IO_WORKER|PF_KTHREAD)) {
1070+
cond_resched();
10701071
return;
1072+
}
10711073

10721074
/*
10731075
* These figures are pulled out of thin air.

0 commit comments

Comments
 (0)