Skip to content

Commit 9e2f9d3

Browse files
committed
erofs: handle overlapped pclusters out of crafted images properly
syzbot reported a task hang issue due to a deadlock case where it is waiting for the folio lock of a cached folio that will be used for cache I/Os. After looking into the crafted fuzzed image, I found it's formed with several overlapped big pclusters as below: Ext: logical offset | length : physical offset | length 0: 0.. 16384 | 16384 : 151552.. 167936 | 16384 1: 16384.. 32768 | 16384 : 155648.. 172032 | 16384 2: 32768.. 49152 | 16384 : 537223168.. 537239552 | 16384 ... Here, extent 0/1 are physically overlapped although it's entirely _impossible_ for normal filesystem images generated by mkfs. First, managed folios containing compressed data will be marked as up-to-date and then unlocked immediately (unlike in-place folios) when compressed I/Os are complete. If physical blocks are not submitted in the incremental order, there should be separate BIOs to avoid dependency issues. However, the current code mis-arranges z_erofs_fill_bio_vec() and BIO submission which causes unexpected BIO waits. Second, managed folios will be connected to their own pclusters for efficient inter-queries. However, this is somewhat hard to implement easily if overlapped big pclusters exist. Again, these only appear in fuzzed images so let's simply fall back to temporary short-lived pages for correctness. Additionally, it justifies that referenced managed folios cannot be truncated for now and reverts part of commit 2080ca1 ("erofs: tidy up `struct z_erofs_bvec`") for simplicity although it shouldn't be any difference. Reported-by: [email protected] Reported-by: [email protected] Reported-by: [email protected] Tested-by: [email protected] Closes: https://lore.kernel.org/r/[email protected] Fixes: 8e6c8fa ("erofs: enable big pcluster feature") Signed-off-by: Gao Xiang <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 3fc3e45 commit 9e2f9d3

File tree

1 file changed

+38
-33
lines changed

1 file changed

+38
-33
lines changed

fs/erofs/zdata.c

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
14281428
struct z_erofs_bvec zbv;
14291429
struct address_space *mapping;
14301430
struct folio *folio;
1431+
struct page *page;
14311432
int bs = i_blocksize(f->inode);
14321433

14331434
/* Except for inplace folios, the entire folio can be used for I/Os */
@@ -1450,7 +1451,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
14501451
* file-backed folios will be used instead.
14511452
*/
14521453
if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
1453-
folio->private = 0;
14541454
tocache = true;
14551455
goto out_tocache;
14561456
}
@@ -1468,7 +1468,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
14681468
}
14691469

14701470
folio_lock(folio);
1471-
if (folio->mapping == mc) {
1471+
if (likely(folio->mapping == mc)) {
14721472
/*
14731473
* The cached folio is still in managed cache but without
14741474
* a valid `->private` pcluster hint. Let's reconnect them.
@@ -1478,41 +1478,44 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
14781478
/* compressed_bvecs[] already takes a ref before */
14791479
folio_put(folio);
14801480
}
1481-
1482-
/* no need to submit if it is already up-to-date */
1483-
if (folio_test_uptodate(folio)) {
1484-
folio_unlock(folio);
1485-
bvec->bv_page = NULL;
1481+
if (likely(folio->private == pcl)) {
1482+
/* don't submit cache I/Os again if already uptodate */
1483+
if (folio_test_uptodate(folio)) {
1484+
folio_unlock(folio);
1485+
bvec->bv_page = NULL;
1486+
}
1487+
return;
14861488
}
1487-
return;
1489+
/*
1490+
* Already linked with another pcluster, which only appears in
1491+
* crafted images by fuzzers for now. But handle this anyway.
1492+
*/
1493+
tocache = false; /* use temporary short-lived pages */
1494+
} else {
1495+
DBG_BUGON(1); /* referenced managed folios can't be truncated */
1496+
tocache = true;
14881497
}
1489-
1490-
/*
1491-
* It has been truncated, so it's unsafe to reuse this one. Let's
1492-
* allocate a new page for compressed data.
1493-
*/
1494-
DBG_BUGON(folio->mapping);
1495-
tocache = true;
14961498
folio_unlock(folio);
14971499
folio_put(folio);
14981500
out_allocfolio:
1499-
zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
1501+
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
15001502
spin_lock(&pcl->obj.lockref.lock);
1501-
if (pcl->compressed_bvecs[nr].page) {
1502-
erofs_pagepool_add(&f->pagepool, zbv.page);
1503+
if (unlikely(pcl->compressed_bvecs[nr].page != zbv.page)) {
1504+
erofs_pagepool_add(&f->pagepool, page);
15031505
spin_unlock(&pcl->obj.lockref.lock);
15041506
cond_resched();
15051507
goto repeat;
15061508
}
1507-
bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
1508-
folio = page_folio(zbv.page);
1509-
/* first mark it as a temporary shortlived folio (now 1 ref) */
1510-
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
1509+
bvec->bv_page = pcl->compressed_bvecs[nr].page = page;
1510+
folio = page_folio(page);
15111511
spin_unlock(&pcl->obj.lockref.lock);
15121512
out_tocache:
15131513
if (!tocache || bs != PAGE_SIZE ||
1514-
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
1514+
filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) {
1515+
/* turn into a temporary shortlived folio (1 ref) */
1516+
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
15151517
return;
1518+
}
15161519
folio_attach_private(folio, pcl);
15171520
/* drop a refcount added by allocpage (then 2 refs in total here) */
15181521
folio_put(folio);
@@ -1647,13 +1650,10 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16471650
cur = mdev.m_pa;
16481651
end = cur + pcl->pclustersize;
16491652
do {
1650-
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
1651-
if (!bvec.bv_page)
1652-
continue;
1653-
1653+
bvec.bv_page = NULL;
16541654
if (bio && (cur != last_pa ||
16551655
bio->bi_bdev != mdev.m_bdev)) {
1656-
io_retry:
1656+
drain_io:
16571657
if (!erofs_is_fscache_mode(sb))
16581658
submit_bio(bio);
16591659
else
@@ -1666,6 +1666,15 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16661666
bio = NULL;
16671667
}
16681668

1669+
if (!bvec.bv_page) {
1670+
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
1671+
if (!bvec.bv_page)
1672+
continue;
1673+
if (cur + bvec.bv_len > end)
1674+
bvec.bv_len = end - cur;
1675+
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
1676+
}
1677+
16691678
if (unlikely(PageWorkingset(bvec.bv_page)) &&
16701679
!memstall) {
16711680
psi_memstall_enter(&pflags);
@@ -1685,13 +1694,9 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
16851694
++nr_bios;
16861695
}
16871696

1688-
if (cur + bvec.bv_len > end)
1689-
bvec.bv_len = end - cur;
1690-
DBG_BUGON(bvec.bv_len < sb->s_blocksize);
16911697
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
16921698
bvec.bv_offset))
1693-
goto io_retry;
1694-
1699+
goto drain_io;
16951700
last_pa = cur + bvec.bv_len;
16961701
bypass = false;
16971702
} while ((cur += bvec.bv_len) < end);

0 commit comments

Comments
 (0)