Skip to content

Commit 54c2354

Browse files
committed
Merge tag 'mm-hotfixes-stable-2024-04-18-14-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull misc fixes from Andrew Morton: "15 hotfixes. 9 are cc:stable and the remainder address post-6.8 issues or aren't considered suitable for backporting. There are a significant number of fixups for this cycle's page_owner changes (series "page_owner: print stacks and their outstanding allocations"). Apart from that, singleton changes all over, mainly in MM" * tag 'mm-hotfixes-stable-2024-04-18-14-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: nilfs2: fix OOB in nilfs_set_de_type MAINTAINERS: update Naoya Horiguchi's email address fork: defer linking file vma until vma is fully initialized mm/shmem: inline shmem_is_huge() for disabled transparent hugepages mm,page_owner: defer enablement of static branch Squashfs: check the inode number is not the invalid value of zero mm,swapops: update check in is_pfn_swap_entry for hwpoison entries mm/memory-failure: fix deadlock when hugetlb_optimize_vmemmap is enabled mm/userfaultfd: allow hugetlb change protection upon poison entry mm,page_owner: fix printing of stack records mm,page_owner: fix accounting of pages when migrating mm,page_owner: fix refcount imbalance mm,page_owner: update metadata for tail pages userfaultfd: change src_folio after ensuring it's unpinned in UFFDIO_MOVE mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly
2 parents 2668e3a + c4a7dc9 commit 54c2354

File tree

16 files changed

+280
-223
lines changed

16 files changed

+280
-223
lines changed

.mailmap

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,8 @@ Mythri P K <[email protected]>
446446
447447
448448
Nadia Yvette Chambers <[email protected]> William Lee Irwin III <[email protected]>
449-
449+
450+
450451
451452
452453

Documentation/mm/page_owner.rst

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
2424
each page. It is already implemented and activated if page owner is
2525
enabled. Other usages are more than welcome.
2626

27-
It can also be used to show all the stacks and their outstanding
28-
allocations, which gives us a quick overview of where the memory is going
29-
without the need to screen through all the pages and match the allocation
30-
and free operation.
27+
It can also be used to show all the stacks and their current number of
28+
allocated base pages, which gives us a quick overview of where the memory
29+
is going without the need to screen through all the pages and match the
30+
allocation and free operation.
3131

3232
page owner is disabled by default. So, if you'd like to use it, you need
3333
to add "page_owner=on" to your boot cmdline. If the kernel is built
@@ -75,42 +75,45 @@ Usage
7575

7676
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
7777
cat stacks.txt
78-
prep_new_page+0xa9/0x120
79-
get_page_from_freelist+0x7e6/0x2140
80-
__alloc_pages+0x18a/0x370
81-
new_slab+0xc8/0x580
82-
___slab_alloc+0x1f2/0xaf0
83-
__slab_alloc.isra.86+0x22/0x40
84-
kmem_cache_alloc+0x31b/0x350
85-
__khugepaged_enter+0x39/0x100
86-
dup_mmap+0x1c7/0x5ce
87-
copy_process+0x1afe/0x1c90
88-
kernel_clone+0x9a/0x3c0
89-
__do_sys_clone+0x66/0x90
90-
do_syscall_64+0x7f/0x160
91-
entry_SYSCALL_64_after_hwframe+0x6c/0x74
92-
stack_count: 234
78+
post_alloc_hook+0x177/0x1a0
79+
get_page_from_freelist+0xd01/0xd80
80+
__alloc_pages+0x39e/0x7e0
81+
allocate_slab+0xbc/0x3f0
82+
___slab_alloc+0x528/0x8a0
83+
kmem_cache_alloc+0x224/0x3b0
84+
sk_prot_alloc+0x58/0x1a0
85+
sk_alloc+0x32/0x4f0
86+
inet_create+0x427/0xb50
87+
__sock_create+0x2e4/0x650
88+
inet_ctl_sock_create+0x30/0x180
89+
igmp_net_init+0xc1/0x130
90+
ops_init+0x167/0x410
91+
setup_net+0x304/0xa60
92+
copy_net_ns+0x29b/0x4a0
93+
create_new_namespaces+0x4a1/0x820
94+
nr_base_pages: 16
9395
...
9496
...
9597
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
9698
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
9799
cat stacks_7000.txt
98-
prep_new_page+0xa9/0x120
99-
get_page_from_freelist+0x7e6/0x2140
100-
__alloc_pages+0x18a/0x370
101-
alloc_pages_mpol+0xdf/0x1e0
102-
folio_alloc+0x14/0x50
103-
filemap_alloc_folio+0xb0/0x100
104-
page_cache_ra_unbounded+0x97/0x180
105-
filemap_fault+0x4b4/0x1200
106-
__do_fault+0x2d/0x110
107-
do_pte_missing+0x4b0/0xa30
108-
__handle_mm_fault+0x7fa/0xb70
109-
handle_mm_fault+0x125/0x300
110-
do_user_addr_fault+0x3c9/0x840
111-
exc_page_fault+0x68/0x150
112-
asm_exc_page_fault+0x22/0x30
113-
stack_count: 8248
100+
post_alloc_hook+0x177/0x1a0
101+
get_page_from_freelist+0xd01/0xd80
102+
__alloc_pages+0x39e/0x7e0
103+
alloc_pages_mpol+0x22e/0x490
104+
folio_alloc+0xd5/0x110
105+
filemap_alloc_folio+0x78/0x230
106+
page_cache_ra_order+0x287/0x6f0
107+
filemap_get_pages+0x517/0x1160
108+
filemap_read+0x304/0x9f0
109+
xfs_file_buffered_read+0xe6/0x1d0 [xfs]
110+
xfs_file_read_iter+0x1f0/0x380 [xfs]
111+
__kernel_read+0x3b9/0x730
112+
kernel_read_file+0x309/0x4d0
113+
__do_sys_finit_module+0x381/0x730
114+
do_syscall_64+0x8d/0x150
115+
entry_SYSCALL_64_after_hwframe+0x62/0x6a
116+
nr_base_pages: 20824
114117
...
115118

116119
cat /sys/kernel/debug/page_owner > page_owner_full.txt

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10024,7 +10024,7 @@ F: drivers/media/platform/st/sti/hva
1002410024

1002510025
HWPOISON MEMORY FAILURE HANDLING
1002610026
M: Miaohe Lin <[email protected]>
10027-
R: Naoya Horiguchi <naoya.horiguchi@nec.com>
10027+
R: Naoya Horiguchi <nao.horiguchi@gmail.com>
1002810028
1002910029
S: Maintained
1003010030
F: mm/hwpoison-inject.c

fs/nilfs2/dir.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
240240

241241
#define S_SHIFT 12
242242
static unsigned char
243-
nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
243+
nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
244244
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
245245
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
246246
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,

fs/squashfs/inode.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
4848
gid_t i_gid;
4949
int err;
5050

51+
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
52+
if (inode->i_ino == 0)
53+
return -EINVAL;
54+
5155
err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
5256
if (err)
5357
return err;
@@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
5862

5963
i_uid_write(inode, i_uid);
6064
i_gid_write(inode, i_gid);
61-
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
6265
inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
6366
inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
6467
inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);

include/linux/shmem_fs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
110110
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
111111
int shmem_unuse(unsigned int type);
112112

113+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
113114
extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
114115
struct mm_struct *mm, unsigned long vm_flags);
116+
#else
117+
static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
118+
struct mm_struct *mm, unsigned long vm_flags)
119+
{
120+
return false;
121+
}
122+
#endif
123+
115124
#ifdef CONFIG_SHMEM
116125
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
117126
#else

include/linux/swapops.h

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
390390
}
391391
#endif /* CONFIG_MIGRATION */
392392

393+
#ifdef CONFIG_MEMORY_FAILURE
394+
395+
/*
396+
* Support for hardware poisoned pages
397+
*/
398+
static inline swp_entry_t make_hwpoison_entry(struct page *page)
399+
{
400+
BUG_ON(!PageLocked(page));
401+
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
402+
}
403+
404+
static inline int is_hwpoison_entry(swp_entry_t entry)
405+
{
406+
return swp_type(entry) == SWP_HWPOISON;
407+
}
408+
409+
#else
410+
411+
static inline swp_entry_t make_hwpoison_entry(struct page *page)
412+
{
413+
return swp_entry(0, 0);
414+
}
415+
416+
static inline int is_hwpoison_entry(swp_entry_t swp)
417+
{
418+
return 0;
419+
}
420+
#endif
421+
393422
typedef unsigned long pte_marker;
394423

395424
#define PTE_MARKER_UFFD_WP BIT(0)
@@ -483,16 +512,17 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
483512

484513
/*
485514
* A pfn swap entry is a special type of swap entry that always has a pfn stored
486-
* in the swap offset. They are used to represent unaddressable device memory
487-
* and to restrict access to a page undergoing migration.
515+
* in the swap offset. They can either be used to represent unaddressable device
516+
* memory, to restrict access to a page undergoing migration or to represent a
517+
* pfn which has been hwpoisoned and unmapped.
488518
*/
489519
static inline bool is_pfn_swap_entry(swp_entry_t entry)
490520
{
491521
/* Make sure the swp offset can always store the needed fields */
492522
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
493523

494524
return is_migration_entry(entry) || is_device_private_entry(entry) ||
495-
is_device_exclusive_entry(entry);
525+
is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
496526
}
497527

498528
struct page_vma_mapped_walk;
@@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
561591
}
562592
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
563593

564-
#ifdef CONFIG_MEMORY_FAILURE
565-
566-
/*
567-
* Support for hardware poisoned pages
568-
*/
569-
static inline swp_entry_t make_hwpoison_entry(struct page *page)
570-
{
571-
BUG_ON(!PageLocked(page));
572-
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
573-
}
574-
575-
static inline int is_hwpoison_entry(swp_entry_t entry)
576-
{
577-
return swp_type(entry) == SWP_HWPOISON;
578-
}
579-
580-
#else
581-
582-
static inline swp_entry_t make_hwpoison_entry(struct page *page)
583-
{
584-
return swp_entry(0, 0);
585-
}
586-
587-
static inline int is_hwpoison_entry(swp_entry_t swp)
588-
{
589-
return 0;
590-
}
591-
#endif
592-
593594
static inline int non_swap_entry(swp_entry_t entry)
594595
{
595596
return swp_type(entry) >= MAX_SWAPFILES;

kernel/fork.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
714714
} else if (anon_vma_fork(tmp, mpnt))
715715
goto fail_nomem_anon_vma_fork;
716716
vm_flags_clear(tmp, VM_LOCKED_MASK);
717+
/*
718+
* Copy/update hugetlb private vma information.
719+
*/
720+
if (is_vm_hugetlb_page(tmp))
721+
hugetlb_dup_vma_private(tmp);
722+
723+
/*
724+
* Link the vma into the MT. After using __mt_dup(), memory
725+
* allocation is not necessary here, so it cannot fail.
726+
*/
727+
vma_iter_bulk_store(&vmi, tmp);
728+
729+
mm->map_count++;
730+
731+
if (tmp->vm_ops && tmp->vm_ops->open)
732+
tmp->vm_ops->open(tmp);
733+
717734
file = tmp->vm_file;
718735
if (file) {
719736
struct address_space *mapping = file->f_mapping;
@@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
730747
i_mmap_unlock_write(mapping);
731748
}
732749

733-
/*
734-
* Copy/update hugetlb private vma information.
735-
*/
736-
if (is_vm_hugetlb_page(tmp))
737-
hugetlb_dup_vma_private(tmp);
738-
739-
/*
740-
* Link the vma into the MT. After using __mt_dup(), memory
741-
* allocation is not necessary here, so it cannot fail.
742-
*/
743-
vma_iter_bulk_store(&vmi, tmp);
744-
745-
mm->map_count++;
746750
if (!(tmp->vm_flags & VM_WIPEONFORK))
747751
retval = copy_page_range(tmp, mpnt);
748752

749-
if (tmp->vm_ops && tmp->vm_ops->open)
750-
tmp->vm_ops->open(tmp);
751-
752753
if (retval) {
753754
mpnt = vma_next(&vmi);
754755
goto loop_out;

mm/gup.c

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
12061206

12071207
/* first iteration or cross vma bound */
12081208
if (!vma || start >= vma->vm_end) {
1209+
/*
1210+
* MADV_POPULATE_(READ|WRITE) wants to handle VMA
1211+
* lookups+error reporting differently.
1212+
*/
1213+
if (gup_flags & FOLL_MADV_POPULATE) {
1214+
vma = vma_lookup(mm, start);
1215+
if (!vma) {
1216+
ret = -ENOMEM;
1217+
goto out;
1218+
}
1219+
if (check_vma_flags(vma, gup_flags)) {
1220+
ret = -EINVAL;
1221+
goto out;
1222+
}
1223+
goto retry;
1224+
}
12091225
vma = gup_vma_lookup(mm, start);
12101226
if (!vma && in_gate_area(mm, start)) {
12111227
ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
16851701
}
16861702

16871703
/*
1688-
* faultin_vma_page_range() - populate (prefault) page tables inside the
1689-
* given VMA range readable/writable
1704+
* faultin_page_range() - populate (prefault) page tables inside the
1705+
* given range readable/writable
16901706
*
16911707
* This takes care of mlocking the pages, too, if VM_LOCKED is set.
16921708
*
1693-
* @vma: target vma
1709+
* @mm: the mm to populate page tables in
16941710
* @start: start address
16951711
* @end: end address
16961712
* @write: whether to prefault readable or writable
16971713
* @locked: whether the mmap_lock is still held
16981714
*
1699-
* Returns either number of processed pages in the vma, or a negative error
1700-
* code on error (see __get_user_pages()).
1715+
* Returns either number of processed pages in the MM, or a negative error
1716+
* code on error (see __get_user_pages()). Note that this function reports
1717+
* errors related to VMAs, such as incompatible mappings, as expected by
1718+
* MADV_POPULATE_(READ|WRITE).
17011719
*
1702-
* vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
1703-
* covered by the VMA. If it's released, *@locked will be set to 0.
1720+
* The range must be page-aligned.
1721+
*
1722+
* mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
17041723
*/
1705-
long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
1706-
unsigned long end, bool write, int *locked)
1724+
long faultin_page_range(struct mm_struct *mm, unsigned long start,
1725+
unsigned long end, bool write, int *locked)
17071726
{
1708-
struct mm_struct *mm = vma->vm_mm;
17091727
unsigned long nr_pages = (end - start) / PAGE_SIZE;
17101728
int gup_flags;
17111729
long ret;
17121730

17131731
VM_BUG_ON(!PAGE_ALIGNED(start));
17141732
VM_BUG_ON(!PAGE_ALIGNED(end));
1715-
VM_BUG_ON_VMA(start < vma->vm_start, vma);
1716-
VM_BUG_ON_VMA(end > vma->vm_end, vma);
17171733
mmap_assert_locked(mm);
17181734

17191735
/*
@@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
17251741
* a poisoned page.
17261742
* !FOLL_FORCE: Require proper access permissions.
17271743
*/
1728-
gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
1744+
gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
1745+
FOLL_MADV_POPULATE;
17291746
if (write)
17301747
gup_flags |= FOLL_WRITE;
17311748

1732-
/*
1733-
* We want to report -EINVAL instead of -EFAULT for any permission
1734-
* problems or incompatible mappings.
1735-
*/
1736-
if (check_vma_flags(vma, gup_flags))
1737-
return -EINVAL;
1738-
1739-
ret = __get_user_pages(mm, start, nr_pages, gup_flags,
1740-
NULL, locked);
1749+
ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
1750+
gup_flags);
17411751
lru_add_drain();
17421752
return ret;
17431753
}

0 commit comments

Comments
 (0)