Skip to content

Commit 4dd5815

Browse files
committed
Merge branch 'akpm' (patches from Andrew)
Merge more fixes from Andrew Morton: "17 fixes" Mostly mm fixes and one ocfs2 locking fix. * emailed patches from Andrew Morton <[email protected]>: mm: memcontrol: fix network errors from failing __GFP_ATOMIC charges mm/memory_hotplug: fix updating the node span scripts/gdb: fix debugging modules compiled with hot/cold partitioning mm: slab: make page_cgroup_ino() to recognize non-compound slab pages properly MAINTAINERS: update information for "MEMORY MANAGEMENT" dump_stack: avoid the livelock of the dump_lock zswap: add Vitaly to the maintainers list mm/page_alloc.c: ratelimit allocation failure warnings more aggressively mm/khugepaged: fix might_sleep() warn with CONFIG_HIGHPTE=y mm, vmstat: reduce zone->lock holding time by /proc/pagetypeinfo mm, vmstat: hide /proc/pagetypeinfo from normal users mm/mmu_notifiers: use the right return code for WARN_ON ocfs2: protect extent tree in ocfs2_prepare_inode_for_write() mm: thp: handle page cache THP correctly in PageTransCompoundMap mm, meminit: recalculate pcpu batch and high limits after init completes mm/gup_benchmark: fix MAP_HUGETLB case mm: memcontrol: fix NULL-ptr deref in percpu stats flush
2 parents 26bc672 + 869712f commit 4dd5815

File tree

15 files changed

+188
-79
lines changed

15 files changed

+188
-79
lines changed

MAINTAINERS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10519,8 +10519,12 @@ F: mm/memblock.c
1051910519
F: Documentation/core-api/boot-time-mm.rst
1052010520

1052110521
MEMORY MANAGEMENT
10522+
M: Andrew Morton <[email protected]>
1052210523
1052310524
W: http://www.linux-mm.org
10525+
T: quilt https://ozlabs.org/~akpm/mmotm/
10526+
T: quilt https://ozlabs.org/~akpm/mmots/
10527+
T: git git://github.com/hnaz/linux-mm.git
1052410528
S: Maintained
1052510529
F: include/linux/mm.h
1052610530
F: include/linux/gfp.h
@@ -18034,6 +18038,7 @@ F: Documentation/vm/zsmalloc.rst
1803418038
ZSWAP COMPRESSED SWAP CACHING
1803518039
M: Seth Jennings <[email protected]>
1803618040
M: Dan Streetman <[email protected]>
18041+
M: Vitaly Wool <[email protected]>
1803718042
1803818043
S: Maintained
1803918044
F: mm/zswap.c

fs/ocfs2/file.c

Lines changed: 90 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2098,53 +2098,89 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
20982098
return 0;
20992099
}
21002100

2101-
static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
2102-
struct file *file,
2103-
loff_t pos, size_t count,
2104-
int *meta_level)
2101+
static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
2102+
struct buffer_head **di_bh,
2103+
int meta_level,
2104+
int overwrite_io,
2105+
int write_sem,
2106+
int wait)
21052107
{
2106-
int ret;
2107-
struct buffer_head *di_bh = NULL;
2108-
u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
2109-
u32 clusters =
2110-
ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
2108+
int ret = 0;
21112109

2112-
ret = ocfs2_inode_lock(inode, &di_bh, 1);
2113-
if (ret) {
2114-
mlog_errno(ret);
2110+
if (wait)
2111+
ret = ocfs2_inode_lock(inode, NULL, meta_level);
2112+
else
2113+
ret = ocfs2_try_inode_lock(inode,
2114+
overwrite_io ? NULL : di_bh, meta_level);
2115+
if (ret < 0)
21152116
goto out;
2117+
2118+
if (wait) {
2119+
if (write_sem)
2120+
down_write(&OCFS2_I(inode)->ip_alloc_sem);
2121+
else
2122+
down_read(&OCFS2_I(inode)->ip_alloc_sem);
2123+
} else {
2124+
if (write_sem)
2125+
ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
2126+
else
2127+
ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);
2128+
2129+
if (!ret) {
2130+
ret = -EAGAIN;
2131+
goto out_unlock;
2132+
}
21162133
}
21172134

2118-
*meta_level = 1;
2135+
return ret;
21192136

2120-
ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
2121-
if (ret)
2122-
mlog_errno(ret);
2137+
out_unlock:
2138+
brelse(*di_bh);
2139+
ocfs2_inode_unlock(inode, meta_level);
21232140
out:
2124-
brelse(di_bh);
21252141
return ret;
21262142
}
21272143

2144+
static void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
2145+
struct buffer_head **di_bh,
2146+
int meta_level,
2147+
int write_sem)
2148+
{
2149+
if (write_sem)
2150+
up_write(&OCFS2_I(inode)->ip_alloc_sem);
2151+
else
2152+
up_read(&OCFS2_I(inode)->ip_alloc_sem);
2153+
2154+
brelse(*di_bh);
2155+
*di_bh = NULL;
2156+
2157+
if (meta_level >= 0)
2158+
ocfs2_inode_unlock(inode, meta_level);
2159+
}
2160+
21282161
static int ocfs2_prepare_inode_for_write(struct file *file,
21292162
loff_t pos, size_t count, int wait)
21302163
{
21312164
int ret = 0, meta_level = 0, overwrite_io = 0;
2165+
int write_sem = 0;
21322166
struct dentry *dentry = file->f_path.dentry;
21332167
struct inode *inode = d_inode(dentry);
21342168
struct buffer_head *di_bh = NULL;
2169+
u32 cpos;
2170+
u32 clusters;
21352171

21362172
/*
21372173
* We start with a read level meta lock and only jump to an ex
21382174
* if we need to make modifications here.
21392175
*/
21402176
for(;;) {
2141-
if (wait)
2142-
ret = ocfs2_inode_lock(inode, NULL, meta_level);
2143-
else
2144-
ret = ocfs2_try_inode_lock(inode,
2145-
overwrite_io ? NULL : &di_bh, meta_level);
2177+
ret = ocfs2_inode_lock_for_extent_tree(inode,
2178+
&di_bh,
2179+
meta_level,
2180+
overwrite_io,
2181+
write_sem,
2182+
wait);
21462183
if (ret < 0) {
2147-
meta_level = -1;
21482184
if (ret != -EAGAIN)
21492185
mlog_errno(ret);
21502186
goto out;
@@ -2156,15 +2192,8 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
21562192
*/
21572193
if (!wait && !overwrite_io) {
21582194
overwrite_io = 1;
2159-
if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
2160-
ret = -EAGAIN;
2161-
goto out_unlock;
2162-
}
21632195

21642196
ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
2165-
brelse(di_bh);
2166-
di_bh = NULL;
2167-
up_read(&OCFS2_I(inode)->ip_alloc_sem);
21682197
if (ret < 0) {
21692198
if (ret != -EAGAIN)
21702199
mlog_errno(ret);
@@ -2183,7 +2212,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
21832212
* set inode->i_size at the end of a write. */
21842213
if (should_remove_suid(dentry)) {
21852214
if (meta_level == 0) {
2186-
ocfs2_inode_unlock(inode, meta_level);
2215+
ocfs2_inode_unlock_for_extent_tree(inode,
2216+
&di_bh,
2217+
meta_level,
2218+
write_sem);
21872219
meta_level = 1;
21882220
continue;
21892221
}
@@ -2197,18 +2229,32 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
21972229

21982230
ret = ocfs2_check_range_for_refcount(inode, pos, count);
21992231
if (ret == 1) {
2200-
ocfs2_inode_unlock(inode, meta_level);
2201-
meta_level = -1;
2202-
2203-
ret = ocfs2_prepare_inode_for_refcount(inode,
2204-
file,
2205-
pos,
2206-
count,
2207-
&meta_level);
2232+
ocfs2_inode_unlock_for_extent_tree(inode,
2233+
&di_bh,
2234+
meta_level,
2235+
write_sem);
2236+
ret = ocfs2_inode_lock_for_extent_tree(inode,
2237+
&di_bh,
2238+
meta_level,
2239+
overwrite_io,
2240+
1,
2241+
wait);
2242+
write_sem = 1;
2243+
if (ret < 0) {
2244+
if (ret != -EAGAIN)
2245+
mlog_errno(ret);
2246+
goto out;
2247+
}
2248+
2249+
cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
2250+
clusters =
2251+
ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
2252+
ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
22082253
}
22092254

22102255
if (ret < 0) {
2211-
mlog_errno(ret);
2256+
if (ret != -EAGAIN)
2257+
mlog_errno(ret);
22122258
goto out_unlock;
22132259
}
22142260

@@ -2219,10 +2265,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
22192265
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
22202266
pos, count, wait);
22212267

2222-
brelse(di_bh);
2223-
2224-
if (meta_level >= 0)
2225-
ocfs2_inode_unlock(inode, meta_level);
2268+
ocfs2_inode_unlock_for_extent_tree(inode,
2269+
&di_bh,
2270+
meta_level,
2271+
write_sem);
22262272

22272273
out:
22282274
return ret;

include/linux/mm.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -695,11 +695,6 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
695695

696696
extern void kvfree(const void *addr);
697697

698-
static inline atomic_t *compound_mapcount_ptr(struct page *page)
699-
{
700-
return &page[1].compound_mapcount;
701-
}
702-
703698
static inline int compound_mapcount(struct page *page)
704699
{
705700
VM_BUG_ON_PAGE(!PageCompound(page), page);

include/linux/mm_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,11 @@ struct page {
221221
#endif
222222
} _struct_page_alignment;
223223

224+
static inline atomic_t *compound_mapcount_ptr(struct page *page)
225+
{
226+
return &page[1].compound_mapcount;
227+
}
228+
224229
/*
225230
* Used for sizing the vmemmap region on some architectures
226231
*/

include/linux/page-flags.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -622,12 +622,28 @@ static inline int PageTransCompound(struct page *page)
622622
*
623623
* Unlike PageTransCompound, this is safe to be called only while
624624
* split_huge_pmd() cannot run from under us, like if protected by the
625-
* MMU notifier, otherwise it may result in page->_mapcount < 0 false
625+
* MMU notifier, otherwise it may result in page->_mapcount check false
626626
* positives.
627+
*
628+
* We have to treat page cache THP differently since every subpage of it
629+
* would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
630+
* mapped in the current process so comparing subpage's _mapcount to
631+
* compound_mapcount to filter out PTE mapped case.
627632
*/
628633
static inline int PageTransCompoundMap(struct page *page)
629634
{
630-
return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0;
635+
struct page *head;
636+
637+
if (!PageTransCompound(page))
638+
return 0;
639+
640+
if (PageAnon(page))
641+
return atomic_read(&page->_mapcount) < 0;
642+
643+
head = compound_head(page);
644+
/* File THP is PMD mapped and not PTE mapped */
645+
return atomic_read(&page->_mapcount) ==
646+
atomic_read(compound_mapcount_ptr(head));
631647
}
632648

633649
/*

lib/dump_stack.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,12 @@ asmlinkage __visible void dump_stack(void)
106106
was_locked = 1;
107107
} else {
108108
local_irq_restore(flags);
109-
cpu_relax();
109+
/*
110+
* Wait for the lock to release before jumping to
111+
* atomic_cmpxchg() in order to mitigate the thundering herd
112+
* problem.
113+
*/
114+
do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
110115
goto retry;
111116
}
112117

mm/khugepaged.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1028,12 +1028,13 @@ static void collapse_huge_page(struct mm_struct *mm,
10281028

10291029
anon_vma_lock_write(vma->anon_vma);
10301030

1031-
pte = pte_offset_map(pmd, address);
1032-
pte_ptl = pte_lockptr(mm, pmd);
1033-
10341031
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
10351032
address, address + HPAGE_PMD_SIZE);
10361033
mmu_notifier_invalidate_range_start(&range);
1034+
1035+
pte = pte_offset_map(pmd, address);
1036+
pte_ptl = pte_lockptr(mm, pmd);
1037+
10371038
pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
10381039
/*
10391040
* After this gup_fast can't run anymore. This also removes

mm/memcontrol.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ ino_t page_cgroup_ino(struct page *page)
484484
unsigned long ino = 0;
485485

486486
rcu_read_lock();
487-
if (PageHead(page) && PageSlab(page))
487+
if (PageSlab(page) && !PageTail(page))
488488
memcg = memcg_from_slab_page(page);
489489
else
490490
memcg = READ_ONCE(page->mem_cgroup);
@@ -2534,6 +2534,15 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
25342534
goto retry;
25352535
}
25362536

2537+
/*
2538+
* Memcg doesn't have a dedicated reserve for atomic
2539+
* allocations. But like the global atomic pool, we need to
2540+
* put the burden of reclaim on regular allocation requests
2541+
* and let these go through as privileged allocations.
2542+
*/
2543+
if (gfp_mask & __GFP_ATOMIC)
2544+
goto force;
2545+
25372546
/*
25382547
* Unlike in global OOM situations, memcg is not in a physical
25392548
* memory shortage. Allow dying and OOM-killed tasks to
@@ -5014,12 +5023,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
50145023
{
50155024
int node;
50165025

5017-
/*
5018-
* Flush percpu vmstats and vmevents to guarantee the value correctness
5019-
* on parent's and all ancestor levels.
5020-
*/
5021-
memcg_flush_percpu_vmstats(memcg, false);
5022-
memcg_flush_percpu_vmevents(memcg);
50235026
for_each_node(node)
50245027
free_mem_cgroup_per_node_info(memcg, node);
50255028
free_percpu(memcg->vmstats_percpu);
@@ -5030,6 +5033,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
50305033
static void mem_cgroup_free(struct mem_cgroup *memcg)
50315034
{
50325035
memcg_wb_domain_exit(memcg);
5036+
/*
5037+
* Flush percpu vmstats and vmevents to guarantee the value correctness
5038+
* on parent's and all ancestor levels.
5039+
*/
5040+
memcg_flush_percpu_vmstats(memcg, false);
5041+
memcg_flush_percpu_vmevents(memcg);
50335042
__mem_cgroup_free(memcg);
50345043
}
50355044

mm/memory_hotplug.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,14 @@ static void update_pgdat_span(struct pglist_data *pgdat)
447447
zone->spanned_pages;
448448

449449
/* No need to lock the zones, they can't change. */
450+
if (!zone->spanned_pages)
451+
continue;
452+
if (!node_end_pfn) {
453+
node_start_pfn = zone->zone_start_pfn;
454+
node_end_pfn = zone_end_pfn;
455+
continue;
456+
}
457+
450458
if (zone_end_pfn > node_end_pfn)
451459
node_end_pfn = zone_end_pfn;
452460
if (zone->zone_start_pfn < node_start_pfn)

mm/mmu_notifier.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
180180
mn->ops->invalidate_range_start, _ret,
181181
!mmu_notifier_range_blockable(range) ? "non-" : "");
182182
WARN_ON(mmu_notifier_range_blockable(range) ||
183-
ret != -EAGAIN);
183+
_ret != -EAGAIN);
184184
ret = _ret;
185185
}
186186
}

0 commit comments

Comments
 (0)