Skip to content

Commit fd39073

Browse files
committed
fs-verity: implement readahead of Merkle tree pages
When fs-verity verifies data pages, currently it reads each Merkle tree page synchronously using read_mapping_page(). Therefore, when the Merkle tree pages aren't already cached, fs-verity causes an extra 4 KiB I/O request for every 512 KiB of data (assuming that the Merkle tree uses SHA-256 and 4 KiB blocks). This results in more I/O requests and performance loss than is strictly necessary. Therefore, implement readahead of the Merkle tree pages. For simplicity, we take advantage of the fact that the kernel already does readahead of the file's *data*, just like it does for any other file. Due to this, we don't really need a separate readahead state (struct file_ra_state) just for the Merkle tree, but rather we just need to piggy-back on the existing data readahead requests. We also only really need to bother with the first level of the Merkle tree, since the usual fan-out factor is 128, so normally over 99% of Merkle tree I/O requests are for the first level. Therefore, make fsverity_verify_bio() enable readahead of the first Merkle tree level, for up to 1/4 the number of pages in the bio, when it sees that the REQ_RAHEAD flag is set on the bio. The readahead size is then passed down to ->read_merkle_tree_page() for the filesystem to (optionally) implement if it sees that the requested page is uncached. While we're at it, also make build_merkle_tree_level() set the Merkle tree readahead size, since it's easy to do there. However, for now don't set the readahead size in fsverity_verify_page(), since currently it's only used to verify holes on ext4 and f2fs, and it would need parameters added to know how much to read ahead. This patch significantly improves fs-verity sequential read performance. Some quick benchmarks with 'cat'-ing a 250MB file after dropping caches: On an ARM64 phone (using sha256-ce): Before: 217 MB/s After: 263 MB/s (compare to sha256sum of non-verity file: 357 MB/s) In an x86_64 VM (using sha256-avx2): Before: 173 MB/s After: 215 MB/s (compare to sha256sum of non-verity file: 223 MB/s) Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Theodore Ts'o <[email protected]> Signed-off-by: Eric Biggers <[email protected]>
1 parent c22415d commit fd39073

File tree

9 files changed

+137
-13
lines changed

9 files changed

+137
-13
lines changed

fs/ext4/verity.c

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,12 +342,55 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
342342
return desc_size;
343343
}
344344

345+
/*
346+
* Prefetch some pages from the file's Merkle tree.
347+
*
348+
* This is basically a stripped-down version of __do_page_cache_readahead()
349+
* which works on pages past i_size.
350+
*/
351+
static void ext4_merkle_tree_readahead(struct address_space *mapping,
352+
pgoff_t start_index, unsigned long count)
353+
{
354+
LIST_HEAD(pages);
355+
unsigned int nr_pages = 0;
356+
struct page *page;
357+
pgoff_t index;
358+
struct blk_plug plug;
359+
360+
for (index = start_index; index < start_index + count; index++) {
361+
page = xa_load(&mapping->i_pages, index);
362+
if (!page || xa_is_value(page)) {
363+
page = __page_cache_alloc(readahead_gfp_mask(mapping));
364+
if (!page)
365+
break;
366+
page->index = index;
367+
list_add(&page->lru, &pages);
368+
nr_pages++;
369+
}
370+
}
371+
blk_start_plug(&plug);
372+
ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true);
373+
blk_finish_plug(&plug);
374+
}
375+
345376
static struct page *ext4_read_merkle_tree_page(struct inode *inode,
346-
pgoff_t index)
377+
pgoff_t index,
378+
unsigned long num_ra_pages)
347379
{
380+
struct page *page;
381+
348382
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
349383

350-
return read_mapping_page(inode->i_mapping, index, NULL);
384+
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
385+
if (!page || !PageUptodate(page)) {
386+
if (page)
387+
put_page(page);
388+
else if (num_ra_pages > 1)
389+
ext4_merkle_tree_readahead(inode->i_mapping, index,
390+
num_ra_pages);
391+
page = read_mapping_page(inode->i_mapping, index, NULL);
392+
}
393+
return page;
351394
}
352395

353396
static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,

fs/f2fs/data.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1881,7 +1881,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
18811881
* use ->readpage() or do the necessary surgery to decouple ->readpages()
18821882
* from read-ahead.
18831883
*/
1884-
static int f2fs_mpage_readpages(struct address_space *mapping,
1884+
int f2fs_mpage_readpages(struct address_space *mapping,
18851885
struct list_head *pages, struct page *page,
18861886
unsigned nr_pages, bool is_readahead)
18871887
{

fs/f2fs/f2fs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3229,6 +3229,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn);
32293229
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
32303230
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
32313231
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
3232+
int f2fs_mpage_readpages(struct address_space *mapping,
3233+
struct list_head *pages, struct page *page,
3234+
unsigned nr_pages, bool is_readahead);
32323235
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
32333236
int op_flags, bool for_write);
32343237
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index);

fs/f2fs/verity.c

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,12 +222,55 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
222222
return size;
223223
}
224224

225+
/*
226+
* Prefetch some pages from the file's Merkle tree.
227+
*
228+
* This is basically a stripped-down version of __do_page_cache_readahead()
229+
* which works on pages past i_size.
230+
*/
231+
static void f2fs_merkle_tree_readahead(struct address_space *mapping,
232+
pgoff_t start_index, unsigned long count)
233+
{
234+
LIST_HEAD(pages);
235+
unsigned int nr_pages = 0;
236+
struct page *page;
237+
pgoff_t index;
238+
struct blk_plug plug;
239+
240+
for (index = start_index; index < start_index + count; index++) {
241+
page = xa_load(&mapping->i_pages, index);
242+
if (!page || xa_is_value(page)) {
243+
page = __page_cache_alloc(readahead_gfp_mask(mapping));
244+
if (!page)
245+
break;
246+
page->index = index;
247+
list_add(&page->lru, &pages);
248+
nr_pages++;
249+
}
250+
}
251+
blk_start_plug(&plug);
252+
f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true);
253+
blk_finish_plug(&plug);
254+
}
255+
225256
static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
226-
pgoff_t index)
257+
pgoff_t index,
258+
unsigned long num_ra_pages)
227259
{
260+
struct page *page;
261+
228262
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
229263

230-
return read_mapping_page(inode->i_mapping, index, NULL);
264+
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
265+
if (!page || !PageUptodate(page)) {
266+
if (page)
267+
put_page(page);
268+
else if (num_ra_pages > 1)
269+
f2fs_merkle_tree_readahead(inode->i_mapping, index,
270+
num_ra_pages);
271+
page = read_mapping_page(inode->i_mapping, index, NULL);
272+
}
273+
return page;
231274
}
232275

233276
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,

fs/verity/enable.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "fsverity_private.h"
99

1010
#include <crypto/hash.h>
11+
#include <linux/backing-dev.h>
1112
#include <linux/mount.h>
1213
#include <linux/pagemap.h>
1314
#include <linux/sched/signal.h>
@@ -86,9 +87,14 @@ static int build_merkle_tree_level(struct file *filp, unsigned int level,
8687
return err;
8788
}
8889
} else {
90+
unsigned long num_ra_pages =
91+
min_t(unsigned long, num_blocks_to_hash - i,
92+
inode->i_sb->s_bdi->io_pages);
93+
8994
/* Non-leaf: hashing hash block from level below */
9095
src_page = vops->read_merkle_tree_page(inode,
91-
params->level_start[level - 1] + i);
96+
params->level_start[level - 1] + i,
97+
num_ra_pages);
9298
if (IS_ERR(src_page)) {
9399
err = PTR_ERR(src_page);
94100
fsverity_err(inode,

fs/verity/fsverity_private.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ struct merkle_tree_params {
5050
unsigned int log_arity; /* log2(hashes_per_block) */
5151
unsigned int num_levels; /* number of levels in Merkle tree */
5252
u64 tree_size; /* Merkle tree size in bytes */
53+
unsigned long level0_blocks; /* number of blocks in tree level 0 */
5354

5455
/*
5556
* Starting block index for each tree level, ordered from leaf level (0)

fs/verity/open.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
102102
/* temporarily using level_start[] to store blocks in level */
103103
params->level_start[params->num_levels++] = blocks;
104104
}
105+
params->level0_blocks = params->level_start[0];
105106

106107
/* Compute the starting block of each level */
107108
offset = 0;

fs/verity/verify.c

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ static inline int cmp_hashes(const struct fsverity_info *vi,
8484
* Return: true if the page is valid, else false.
8585
*/
8686
static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
87-
struct ahash_request *req, struct page *data_page)
87+
struct ahash_request *req, struct page *data_page,
88+
unsigned long level0_ra_pages)
8889
{
8990
const struct merkle_tree_params *params = &vi->tree_params;
9091
const unsigned int hsize = params->digest_size;
@@ -117,8 +118,8 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
117118
pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
118119
level, hindex, hoffset);
119120

120-
hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
121-
hindex);
121+
hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex,
122+
level == 0 ? level0_ra_pages : 0);
122123
if (IS_ERR(hpage)) {
123124
err = PTR_ERR(hpage);
124125
fsverity_err(inode,
@@ -195,7 +196,7 @@ bool fsverity_verify_page(struct page *page)
195196
if (unlikely(!req))
196197
return false;
197198

198-
valid = verify_page(inode, vi, req, page);
199+
valid = verify_page(inode, vi, req, page, 0);
199200

200201
ahash_request_free(req);
201202

@@ -222,21 +223,42 @@ void fsverity_verify_bio(struct bio *bio)
222223
{
223224
struct inode *inode = bio_first_page_all(bio)->mapping->host;
224225
const struct fsverity_info *vi = inode->i_verity_info;
226+
const struct merkle_tree_params *params = &vi->tree_params;
225227
struct ahash_request *req;
226228
struct bio_vec *bv;
227229
struct bvec_iter_all iter_all;
230+
unsigned long max_ra_pages = 0;
228231

229-
req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
232+
req = ahash_request_alloc(params->hash_alg->tfm, GFP_NOFS);
230233
if (unlikely(!req)) {
231234
bio_for_each_segment_all(bv, bio, iter_all)
232235
SetPageError(bv->bv_page);
233236
return;
234237
}
235238

239+
if (bio->bi_opf & REQ_RAHEAD) {
240+
/*
241+
* If this bio is for data readahead, then we also do readahead
242+
* of the first (largest) level of the Merkle tree. Namely,
243+
* when a Merkle tree page is read, we also try to piggy-back on
244+
* some additional pages -- up to 1/4 the number of data pages.
245+
*
246+
* This improves sequential read performance, as it greatly
247+
* reduces the number of I/O requests made to the Merkle tree.
248+
*/
249+
bio_for_each_segment_all(bv, bio, iter_all)
250+
max_ra_pages++;
251+
max_ra_pages /= 4;
252+
}
253+
236254
bio_for_each_segment_all(bv, bio, iter_all) {
237255
struct page *page = bv->bv_page;
256+
unsigned long level0_index = page->index >> params->log_arity;
257+
unsigned long level0_ra_pages =
258+
min(max_ra_pages, params->level0_blocks - level0_index);
238259

239-
if (!PageError(page) && !verify_page(inode, vi, req, page))
260+
if (!PageError(page) &&
261+
!verify_page(inode, vi, req, page, level0_ra_pages))
240262
SetPageError(page);
241263
}
242264

include/linux/fsverity.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ struct fsverity_operations {
7777
*
7878
* @inode: the inode
7979
* @index: 0-based index of the page within the Merkle tree
80+
* @num_ra_pages: The number of Merkle tree pages that should be
81+
* prefetched starting at @index if the page at @index
82+
* isn't already cached. Implementations may ignore this
83+
* argument; it's only a performance optimization.
8084
*
8185
* This can be called at any time on an open verity file, as well as
8286
* between ->begin_enable_verity() and ->end_enable_verity(). It may be
@@ -87,7 +91,8 @@ struct fsverity_operations {
8791
* Return: the page on success, ERR_PTR() on failure
8892
*/
8993
struct page *(*read_merkle_tree_page)(struct inode *inode,
90-
pgoff_t index);
94+
pgoff_t index,
95+
unsigned long num_ra_pages);
9196

9297
/**
9398
* Write a Merkle tree block to the given inode.

0 commit comments

Comments
 (0)