Skip to content

Commit ed3bad2

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "10 patches. Subsystems affected by this patch series: MAINTAINERS and mm (shmem, pagealloc, tracing, memcg, memory-failure, vmscan, kfence, and hugetlb)" * emailed patches from Andrew Morton <[email protected]>: hugetlb: don't pass page cache pages to restore_reserve_on_error kfence: fix is_kfence_address() for addresses below KFENCE_POOL_SIZE mm: vmscan: fix missing psi annotation for node_reclaim() mm/hwpoison: retry with shake_page() for unhandlable pages mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim MAINTAINERS: update ClangBuiltLinux IRC chat mmflags.h: add missing __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON names mm/page_alloc: don't corrupt pcppage_migratetype Revert "mm: swap: check if swap backing device is congested or not" Revert "mm/shmem: fix shmem_swapin() race with swapoff"
2 parents 8ba9fbe + c7b1850 commit ed3bad2

File tree

10 files changed

+81
-68
lines changed

10 files changed

+81
-68
lines changed

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4508,7 +4508,7 @@ L: [email protected]
45084508
S: Supported
45094509
W: https://clangbuiltlinux.github.io/
45104510
B: https://github.com/ClangBuiltLinux/linux/issues
4511-
C: irc://chat.freenode.net/clangbuiltlinux
4511+
C: irc://irc.libera.chat/clangbuiltlinux
45124512
F: Documentation/kbuild/llvm.rst
45134513
F: include/linux/compiler-clang.h
45144514
F: scripts/clang-tools/

include/linux/kfence.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate;
5151
static __always_inline bool is_kfence_address(const void *addr)
5252
{
5353
/*
54-
* The non-NULL check is required in case the __kfence_pool pointer was
55-
* never initialized; keep it in the slow-path after the range-check.
54+
* The __kfence_pool != NULL check is required to deal with the case
55+
* where __kfence_pool == NULL && addr < KFENCE_POOL_SIZE. Keep it in
56+
* the slow-path after the range-check!
5657
*/
57-
return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr);
58+
return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && __kfence_pool);
5859
}
5960

6061
/**

include/linux/memcontrol.h

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void)
612612
return !cgroup_subsys_enabled(memory_cgrp_subsys);
613613
}
614614

615-
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
616-
struct mem_cgroup *memcg,
617-
bool in_low_reclaim)
615+
static inline void mem_cgroup_protection(struct mem_cgroup *root,
616+
struct mem_cgroup *memcg,
617+
unsigned long *min,
618+
unsigned long *low)
618619
{
620+
*min = *low = 0;
621+
619622
if (mem_cgroup_disabled())
620-
return 0;
623+
return;
621624

622625
/*
623626
* There is no reclaim protection applied to a targeted reclaim.
@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
653656
*
654657
*/
655658
if (root == memcg)
656-
return 0;
657-
658-
if (in_low_reclaim)
659-
return READ_ONCE(memcg->memory.emin);
659+
return;
660660

661-
return max(READ_ONCE(memcg->memory.emin),
662-
READ_ONCE(memcg->memory.elow));
661+
*min = READ_ONCE(memcg->memory.emin);
662+
*low = READ_ONCE(memcg->memory.elow);
663663
}
664664

665665
void mem_cgroup_calculate_protection(struct mem_cgroup *root,
@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
11471147
{
11481148
}
11491149

1150-
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
1151-
struct mem_cgroup *memcg,
1152-
bool in_low_reclaim)
1150+
static inline void mem_cgroup_protection(struct mem_cgroup *root,
1151+
struct mem_cgroup *memcg,
1152+
unsigned long *min,
1153+
unsigned long *low)
11531154
{
1154-
return 0;
1155+
*min = *low = 0;
11551156
}
11561157

11571158
static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,

include/trace/events/mmflags.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@
4848
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
4949
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
5050
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
51-
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\
51+
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
52+
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
53+
{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
5254

5355
#define show_gfp_flags(flags) \
5456
(flags) ? __print_flags(flags, "|", \

mm/hugetlb.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
24762476
if (!rc) {
24772477
/*
24782478
* This indicates there is an entry in the reserve map
2479-
* added by alloc_huge_page. We know it was added
2479+
* not added by alloc_huge_page. We know it was added
24802480
* before the alloc_huge_page call, otherwise
24812481
* HPageRestoreReserve would be set on the page.
24822482
* Remove the entry so that a subsequent allocation
@@ -4660,7 +4660,9 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
46604660
spin_unlock(ptl);
46614661
mmu_notifier_invalidate_range_end(&range);
46624662
out_release_all:
4663-
restore_reserve_on_error(h, vma, haddr, new_page);
4663+
/* No restore in case of successful pagetable update (Break COW) */
4664+
if (new_page != old_page)
4665+
restore_reserve_on_error(h, vma, haddr, new_page);
46644666
put_page(new_page);
46654667
out_release_old:
46664668
put_page(old_page);
@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
47764778
pte_t new_pte;
47774779
spinlock_t *ptl;
47784780
unsigned long haddr = address & huge_page_mask(h);
4779-
bool new_page = false;
4781+
bool new_page, new_pagecache_page = false;
47804782

47814783
/*
47824784
* Currently, we are forced to kill the process in the event the
@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
47994801
goto out;
48004802

48014803
retry:
4804+
new_page = false;
48024805
page = find_lock_page(mapping, idx);
48034806
if (!page) {
48044807
/* Check for page in userfault range */
@@ -4842,6 +4845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
48424845
goto retry;
48434846
goto out;
48444847
}
4848+
new_pagecache_page = true;
48454849
} else {
48464850
lock_page(page);
48474851
if (unlikely(anon_vma_prepare(vma))) {
@@ -4926,7 +4930,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
49264930
spin_unlock(ptl);
49274931
backout_unlocked:
49284932
unlock_page(page);
4929-
restore_reserve_on_error(h, vma, haddr, page);
4933+
/* restore reserve for newly allocated pages not in page cache */
4934+
if (new_page && !new_pagecache_page)
4935+
restore_reserve_on_error(h, vma, haddr, page);
49304936
put_page(page);
49314937
goto out;
49324938
}
@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
51355141
int ret = -ENOMEM;
51365142
struct page *page;
51375143
int writable;
5144+
bool new_pagecache_page = false;
51385145

51395146
if (is_continue) {
51405147
ret = -EFAULT;
@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
52285235
ret = huge_add_to_page_cache(page, mapping, idx);
52295236
if (ret)
52305237
goto out_release_nounlock;
5238+
new_pagecache_page = true;
52315239
}
52325240

52335241
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
@@ -5291,7 +5299,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
52915299
if (vm_shared || is_continue)
52925300
unlock_page(page);
52935301
out_release_nounlock:
5294-
restore_reserve_on_error(h, dst_vma, dst_addr, page);
5302+
if (!new_pagecache_page)
5303+
restore_reserve_on_error(h, dst_vma, dst_addr, page);
52955304
put_page(page);
52965305
goto out;
52975306
}

mm/memory-failure.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page)
11461146
* unexpected races caused by taking a page refcount.
11471147
*/
11481148
if (!HWPoisonHandlable(head))
1149-
return 0;
1149+
return -EBUSY;
11501150

11511151
if (PageTransHuge(head)) {
11521152
/*
@@ -1199,9 +1199,15 @@ static int get_any_page(struct page *p, unsigned long flags)
11991199
}
12001200
goto out;
12011201
} else if (ret == -EBUSY) {
1202-
/* We raced with freeing huge page to buddy, retry. */
1203-
if (pass++ < 3)
1202+
/*
1203+
* We raced with (possibly temporary) unhandlable
1204+
* page, retry.
1205+
*/
1206+
if (pass++ < 3) {
1207+
shake_page(p, 1);
12041208
goto try_again;
1209+
}
1210+
ret = -EIO;
12051211
goto out;
12061212
}
12071213
}

mm/page_alloc.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3453,19 +3453,10 @@ void free_unref_page_list(struct list_head *list)
34533453
* comment in free_unref_page.
34543454
*/
34553455
migratetype = get_pcppage_migratetype(page);
3456-
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
3457-
if (unlikely(is_migrate_isolate(migratetype))) {
3458-
list_del(&page->lru);
3459-
free_one_page(page_zone(page), page, pfn, 0,
3460-
migratetype, FPI_NONE);
3461-
continue;
3462-
}
3463-
3464-
/*
3465-
* Non-isolated types over MIGRATE_PCPTYPES get added
3466-
* to the MIGRATE_MOVABLE pcp list.
3467-
*/
3468-
set_pcppage_migratetype(page, MIGRATE_MOVABLE);
3456+
if (unlikely(is_migrate_isolate(migratetype))) {
3457+
list_del(&page->lru);
3458+
free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
3459+
continue;
34693460
}
34703461

34713462
set_page_private(page, pfn);
@@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list)
34753466
list_for_each_entry_safe(page, next, list, lru) {
34763467
pfn = page_private(page);
34773468
set_page_private(page, 0);
3469+
3470+
/*
3471+
* Non-isolated types over MIGRATE_PCPTYPES get added
3472+
* to the MIGRATE_MOVABLE pcp list.
3473+
*/
34783474
migratetype = get_pcppage_migratetype(page);
3475+
if (unlikely(migratetype >= MIGRATE_PCPTYPES))
3476+
migratetype = MIGRATE_MOVABLE;
3477+
34793478
trace_mm_page_free_batched(page);
34803479
free_unref_page_commit(page, pfn, migratetype, 0);
34813480

mm/shmem.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,21 +1696,14 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
16961696
struct address_space *mapping = inode->i_mapping;
16971697
struct shmem_inode_info *info = SHMEM_I(inode);
16981698
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
1699-
struct swap_info_struct *si;
1700-
struct page *page = NULL;
1699+
struct page *page;
17011700
swp_entry_t swap;
17021701
int error;
17031702

17041703
VM_BUG_ON(!*pagep || !xa_is_value(*pagep));
17051704
swap = radix_to_swp_entry(*pagep);
17061705
*pagep = NULL;
17071706

1708-
/* Prevent swapoff from happening to us. */
1709-
si = get_swap_device(swap);
1710-
if (!si) {
1711-
error = EINVAL;
1712-
goto failed;
1713-
}
17141707
/* Look it up and read it in.. */
17151708
page = lookup_swap_cache(swap, NULL, 0);
17161709
if (!page) {
@@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
17721765
swap_free(swap);
17731766

17741767
*pagep = page;
1775-
if (si)
1776-
put_swap_device(si);
17771768
return 0;
17781769
failed:
17791770
if (!shmem_confirm_swap(mapping, index, swap))
@@ -1784,9 +1775,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
17841775
put_page(page);
17851776
}
17861777

1787-
if (si)
1788-
put_swap_device(si);
1789-
17901778
return error;
17911779
}
17921780

mm/swap_state.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
628628
if (!mask)
629629
goto skip;
630630

631-
/* Test swap type to make sure the dereference is safe */
632-
if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
633-
struct inode *inode = si->swap_file->f_mapping->host;
634-
if (inode_read_congested(inode))
635-
goto skip;
636-
}
637-
638631
do_poll = false;
639632
/* Read a page_cluster sized and aligned cluster around offset. */
640633
start_offset = offset & ~mask;

mm/vmscan.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,12 @@ struct scan_control {
100100
unsigned int may_swap:1;
101101

102102
/*
103-
* Cgroups are not reclaimed below their configured memory.low,
104-
* unless we threaten to OOM. If any cgroups are skipped due to
105-
* memory.low and nothing was reclaimed, go back for memory.low.
103+
* Cgroup memory below memory.low is protected as long as we
104+
* don't threaten to OOM. If any cgroup is reclaimed at
105+
* reduced force or passed over entirely due to its memory.low
106+
* setting (memcg_low_skipped), and nothing is reclaimed as a
107+
* result, then go back for one more cycle that reclaims the protected
108+
* memory (memcg_low_reclaim) to avert OOM.
106109
*/
107110
unsigned int memcg_low_reclaim:1;
108111
unsigned int memcg_low_skipped:1;
@@ -2537,15 +2540,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
25372540
for_each_evictable_lru(lru) {
25382541
int file = is_file_lru(lru);
25392542
unsigned long lruvec_size;
2543+
unsigned long low, min;
25402544
unsigned long scan;
2541-
unsigned long protection;
25422545

25432546
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
2544-
protection = mem_cgroup_protection(sc->target_mem_cgroup,
2545-
memcg,
2546-
sc->memcg_low_reclaim);
2547+
mem_cgroup_protection(sc->target_mem_cgroup, memcg,
2548+
&min, &low);
25472549

2548-
if (protection) {
2550+
if (min || low) {
25492551
/*
25502552
* Scale a cgroup's reclaim pressure by proportioning
25512553
* its current usage to its memory.low or memory.min
@@ -2576,6 +2578,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
25762578
* hard protection.
25772579
*/
25782580
unsigned long cgroup_size = mem_cgroup_size(memcg);
2581+
unsigned long protection;
2582+
2583+
/* memory.low scaling, make sure we retry before OOM */
2584+
if (!sc->memcg_low_reclaim && low > min) {
2585+
protection = low;
2586+
sc->memcg_low_skipped = 1;
2587+
} else {
2588+
protection = min;
2589+
}
25792590

25802591
/* Avoid TOCTOU with earlier protection check */
25812592
cgroup_size = max(cgroup_size, protection);
@@ -4413,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
44134424
.may_swap = 1,
44144425
.reclaim_idx = gfp_zone(gfp_mask),
44154426
};
4427+
unsigned long pflags;
44164428

44174429
trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
44184430
sc.gfp_mask);
44194431

44204432
cond_resched();
4433+
psi_memstall_enter(&pflags);
44214434
fs_reclaim_acquire(sc.gfp_mask);
44224435
/*
44234436
* We need to be able to allocate from the reserves for RECLAIM_UNMAP
@@ -4442,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
44424455
current->flags &= ~PF_SWAPWRITE;
44434456
memalloc_noreclaim_restore(noreclaim_flag);
44444457
fs_reclaim_release(sc.gfp_mask);
4458+
psi_memstall_leave(&pflags);
44454459

44464460
trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
44474461

0 commit comments

Comments
 (0)