Skip to content

Commit 4f66170

Browse files
author
Matthew Wilcox (Oracle)
committed
filemap: Allow __filemap_get_folio to allocate large folios
Allow callers of __filemap_get_folio() to specify a preferred folio order in the FGP flags. This is only honoured in the FGP_CREATE path; if there is already a folio in the page cache that covers the index, we will return it, no matter what its order is. No create-around is attempted; we will only create folios which start at the specified index. Unmodified callers will continue to allocate order 0 folios. Signed-off-by: Matthew Wilcox (Oracle) <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]>
1 parent ffc143d commit 4f66170

File tree

3 files changed

+67
-26
lines changed

3 files changed

+67
-26
lines changed

include/linux/pagemap.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page)
470470
return folio_detach_private(page_folio(page));
471471
}
472472

473+
/*
474+
* There are some parts of the kernel which assume that PMD entries
475+
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
476+
* limit the maximum allocation order to PMD size. I'm not aware of any
477+
* assumptions about maximum order if THP are disabled, but 8 seems like
478+
* a good order (that's 1MB if you're using 4kB pages)
479+
*/
480+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
481+
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
482+
#else
483+
#define MAX_PAGECACHE_ORDER 8
484+
#endif
485+
473486
#ifdef CONFIG_NUMA
474487
struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
475488
#else
@@ -535,9 +548,30 @@ typedef unsigned int __bitwise fgf_t;
535548
#define FGP_NOWAIT ((__force fgf_t)0x00000020)
536549
#define FGP_FOR_MMAP ((__force fgf_t)0x00000040)
537550
#define FGP_STABLE ((__force fgf_t)0x00000080)
551+
#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */
538552

539553
#define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
540554

555+
/**
556+
* fgf_set_order - Encode a length in the fgf_t flags.
557+
* @size: The suggested size of the folio to create.
558+
*
559+
* The caller of __filemap_get_folio() can use this to suggest a preferred
560+
* size for the folio that is created. If there is already a folio at
561+
* the index, it will be returned, no matter what its size. If a folio
562+
* is freshly created, it may be of a different size than requested
563+
* due to alignment constraints, memory pressure, or the presence of
564+
* other folios at nearby indices.
565+
*/
566+
static inline fgf_t fgf_set_order(size_t size)
567+
{
568+
unsigned int shift = ilog2(size);
569+
570+
if (shift <= PAGE_SHIFT)
571+
return 0;
572+
return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
573+
}
574+
541575
void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
542576
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
543577
fgf_t fgp_flags, gfp_t gfp);

mm/filemap.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,7 +1905,9 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
19051905
folio_wait_stable(folio);
19061906
no_page:
19071907
if (!folio && (fgp_flags & FGP_CREAT)) {
1908+
unsigned order = FGF_GET_ORDER(fgp_flags);
19081909
int err;
1910+
19091911
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
19101912
gfp |= __GFP_WRITE;
19111913
if (fgp_flags & FGP_NOFS)
@@ -1914,26 +1916,44 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
19141916
gfp &= ~GFP_KERNEL;
19151917
gfp |= GFP_NOWAIT | __GFP_NOWARN;
19161918
}
1917-
1918-
folio = filemap_alloc_folio(gfp, 0);
1919-
if (!folio)
1920-
return ERR_PTR(-ENOMEM);
1921-
19221919
if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
19231920
fgp_flags |= FGP_LOCK;
19241921

1925-
/* Init accessed so avoid atomic mark_page_accessed later */
1926-
if (fgp_flags & FGP_ACCESSED)
1927-
__folio_set_referenced(folio);
1922+
if (!mapping_large_folio_support(mapping))
1923+
order = 0;
1924+
if (order > MAX_PAGECACHE_ORDER)
1925+
order = MAX_PAGECACHE_ORDER;
1926+
/* If we're not aligned, allocate a smaller folio */
1927+
if (index & ((1UL << order) - 1))
1928+
order = __ffs(index);
19281929

1929-
err = filemap_add_folio(mapping, folio, index, gfp);
1930-
if (unlikely(err)) {
1930+
do {
1931+
gfp_t alloc_gfp = gfp;
1932+
1933+
err = -ENOMEM;
1934+
if (order == 1)
1935+
order = 0;
1936+
if (order > 0)
1937+
alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
1938+
folio = filemap_alloc_folio(alloc_gfp, order);
1939+
if (!folio)
1940+
continue;
1941+
1942+
/* Init accessed so avoid atomic mark_page_accessed later */
1943+
if (fgp_flags & FGP_ACCESSED)
1944+
__folio_set_referenced(folio);
1945+
1946+
err = filemap_add_folio(mapping, folio, index, gfp);
1947+
if (!err)
1948+
break;
19311949
folio_put(folio);
19321950
folio = NULL;
1933-
if (err == -EEXIST)
1934-
goto repeat;
1935-
}
1951+
} while (order-- > 0);
19361952

1953+
if (err == -EEXIST)
1954+
goto repeat;
1955+
if (err)
1956+
return ERR_PTR(err);
19371957
/*
19381958
* filemap_add_folio locks the page, and for mmap
19391959
* we expect an unlocked page.

mm/readahead.c

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -461,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping,
461461
return 1;
462462
}
463463

464-
/*
465-
* There are some parts of the kernel which assume that PMD entries
466-
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
467-
* limit the maximum allocation order to PMD size. I'm not aware of any
468-
* assumptions about maximum order if THP are disabled, but 8 seems like
469-
* a good order (that's 1MB if you're using 4kB pages)
470-
*/
471-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
472-
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
473-
#else
474-
#define MAX_PAGECACHE_ORDER 8
475-
#endif
476-
477464
static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
478465
pgoff_t mark, unsigned int order, gfp_t gfp)
479466
{

0 commit comments

Comments
 (0)