Skip to content

Commit ddc1a5c

Browse files
Hugh Dickinsakpm00
authored andcommitted
mempolicy: alloc_pages_mpol() for NUMA policy without vma
Shrink shmem's stack usage by eliminating the pseudo-vma from its folio allocation. alloc_pages_mpol(gfp, order, pol, ilx, nid) becomes the principal actor for passing mempolicy choice down to __alloc_pages(), rather than vma_alloc_folio(gfp, order, vma, addr, hugepage). vma_alloc_folio() and alloc_pages() remain, but as wrappers around alloc_pages_mpol(). alloc_pages_bulk_*() untouched, except to provide the additional args to policy_nodemask(), which subsumes policy_node(). Cleanup throughout, cutting out some unhelpful "helpers". It would all be much simpler without MPOL_INTERLEAVE, but that adds a dynamic to the constant mpol: complicated by v3.6 commit 09c231c ("tmpfs: distribute interleave better across nodes"), which added ino bias to the interleave, hidden from mm/mempolicy.c until this commit. Hence "ilx" throughout, the "interleave index". Originally I thought it could be done just with nid, but that's wrong: the nodemask may come from the shared policy layer below a shmem vma, or it may come from the task layer above a shmem vma; and without the final nodemask then nodeid cannot be decided. And how ilx is applied depends also on page order. The interleave index is almost always irrelevant unless MPOL_INTERLEAVE: with one exception in alloc_pages_mpol(), where the NO_INTERLEAVE_INDEX passed down from vma-less alloc_pages() is also used as hint not to use THP-style hugepage allocation - to avoid the overhead of a hugepage arg (though I don't understand why we never just added a GFP bit for THP - if it actually needs a different allocation strategy from other pages of the same order). vma_alloc_folio() still carries its hugepage arg here, but it is not used, and should be removed when agreed. get_vma_policy() no longer allows a NULL vma: over time I believe we've eradicated all the places which used to need it e.g. swapoff and madvise used to pass NULL vma to read_swap_cache_async(), but now know the vma. [[email protected]: handle NULL mpol being passed to __read_swap_cache_async()] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Hugh Dickins <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Greg Kroah-Hartman <[email protected]> Cc: Huang Ying <[email protected]> Cc: Kefeng Wang <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Nhat Pham <[email protected]> Cc: Sidhartha Kumar <[email protected]> Cc: Suren Baghdasaryan <[email protected]> Cc: Tejun heo <[email protected]> Cc: Vishal Moola (Oracle) <[email protected]> Cc: Yang Shi <[email protected]> Cc: Yosry Ahmed <[email protected]> Cc: Domenico Cerasuolo <[email protected]> Cc: Johannes Weiner <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 23e4883 commit ddc1a5c

File tree

10 files changed

+308
-323
lines changed

10 files changed

+308
-323
lines changed

fs/proc/task_mmu.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2673,8 +2673,9 @@ static int show_numa_map(struct seq_file *m, void *v)
26732673
struct numa_maps *md = &numa_priv->md;
26742674
struct file *file = vma->vm_file;
26752675
struct mm_struct *mm = vma->vm_mm;
2676-
struct mempolicy *pol;
26772676
char buffer[64];
2677+
struct mempolicy *pol;
2678+
pgoff_t ilx;
26782679
int nid;
26792680

26802681
if (!mm)
@@ -2683,7 +2684,7 @@ static int show_numa_map(struct seq_file *m, void *v)
26832684
/* Ensure we start with an empty set of numa_maps statistics. */
26842685
memset(md, 0, sizeof(*md));
26852686

2686-
pol = __get_vma_policy(vma, vma->vm_start);
2687+
pol = __get_vma_policy(vma, vma->vm_start, &ilx);
26872688
if (pol) {
26882689
mpol_to_str(buffer, sizeof(buffer), pol);
26892690
mpol_cond_put(pol);

include/linux/gfp.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/topology.h>
99

1010
struct vm_area_struct;
11+
struct mempolicy;
1112

1213
/* Convert GFP flags to their corresponding migrate type */
1314
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
@@ -262,14 +263,21 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
262263

263264
#ifdef CONFIG_NUMA
264265
struct page *alloc_pages(gfp_t gfp, unsigned int order);
265-
struct folio *folio_alloc(gfp_t gfp, unsigned order);
266+
struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
267+
struct mempolicy *mpol, pgoff_t ilx, int nid);
268+
struct folio *folio_alloc(gfp_t gfp, unsigned int order);
266269
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
267270
unsigned long addr, bool hugepage);
268271
#else
269272
static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
270273
{
271274
return alloc_pages_node(numa_node_id(), gfp_mask, order);
272275
}
276+
static inline struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
277+
struct mempolicy *mpol, pgoff_t ilx, int nid)
278+
{
279+
return alloc_pages(gfp, order);
280+
}
273281
static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
274282
{
275283
return __folio_alloc_node(gfp, order, numa_node_id());

include/linux/mempolicy.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
struct mm_struct;
1919

20+
#define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */
21+
2022
#ifdef CONFIG_NUMA
2123

2224
/*
@@ -126,7 +128,9 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
126128

127129
struct mempolicy *get_task_policy(struct task_struct *p);
128130
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
129-
unsigned long addr);
131+
unsigned long addr, pgoff_t *ilx);
132+
struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
133+
unsigned long addr, int order, pgoff_t *ilx);
130134
bool vma_policy_mof(struct vm_area_struct *vma);
131135

132136
extern void numa_default_policy(void);
@@ -140,8 +144,6 @@ extern int huge_node(struct vm_area_struct *vma,
140144
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
141145
extern bool mempolicy_in_oom_domain(struct task_struct *tsk,
142146
const nodemask_t *mask);
143-
extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy);
144-
145147
extern unsigned int mempolicy_slab_node(void);
146148

147149
extern enum zone_type policy_zone;
@@ -179,6 +181,11 @@ extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone);
179181

180182
struct mempolicy {};
181183

184+
static inline struct mempolicy *get_task_policy(struct task_struct *p)
185+
{
186+
return NULL;
187+
}
188+
182189
static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b)
183190
{
184191
return true;
@@ -213,6 +220,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx)
213220
return NULL;
214221
}
215222

223+
static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
224+
unsigned long addr, int order, pgoff_t *ilx)
225+
{
226+
*ilx = 0;
227+
return NULL;
228+
}
229+
216230
static inline int
217231
vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
218232
{

include/linux/mm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ struct vm_operations_struct {
619619
* policy.
620620
*/
621621
struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
622-
unsigned long addr);
622+
unsigned long addr, pgoff_t *ilx);
623623
#endif
624624
/*
625625
* Called by vm_normal_page() for special PTEs to find the

ipc/shm.c

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -562,30 +562,25 @@ static unsigned long shm_pagesize(struct vm_area_struct *vma)
562562
}
563563

564564
#ifdef CONFIG_NUMA
565-
static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
565+
static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol)
566566
{
567-
struct file *file = vma->vm_file;
568-
struct shm_file_data *sfd = shm_file_data(file);
567+
struct shm_file_data *sfd = shm_file_data(vma->vm_file);
569568
int err = 0;
570569

571570
if (sfd->vm_ops->set_policy)
572-
err = sfd->vm_ops->set_policy(vma, new);
571+
err = sfd->vm_ops->set_policy(vma, mpol);
573572
return err;
574573
}
575574

576575
static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
577-
unsigned long addr)
576+
unsigned long addr, pgoff_t *ilx)
578577
{
579-
struct file *file = vma->vm_file;
580-
struct shm_file_data *sfd = shm_file_data(file);
581-
struct mempolicy *pol = NULL;
578+
struct shm_file_data *sfd = shm_file_data(vma->vm_file);
579+
struct mempolicy *mpol = vma->vm_policy;
582580

583581
if (sfd->vm_ops->get_policy)
584-
pol = sfd->vm_ops->get_policy(vma, addr);
585-
else if (vma->vm_policy)
586-
pol = vma->vm_policy;
587-
588-
return pol;
582+
mpol = sfd->vm_ops->get_policy(vma, addr, ilx);
583+
return mpol;
589584
}
590585
#endif
591586

0 commit comments

Comments
 (0)