Skip to content

Commit 7361d1e

Browse files
committed
erofs: support unaligned encoded data
We're almost there. It's straight-forward to adapt the current decompression subsystem to support unaligned encoded (compressed) data. Note that unaligned data is not encouraged because of worse I/O and caching efficiency unless the corresponding compressor doesn't support fixed-sized output compression natively like Zstd. Signed-off-by: Gao Xiang <[email protected]> Acked-by: Chao Yu <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 1d191b4 commit 7361d1e

File tree

2 files changed

+46
-48
lines changed

2 files changed

+46
-48
lines changed

fs/erofs/decompressor.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
313313
rq->outputsize -= cur;
314314
}
315315

316-
for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) {
316+
for (; rq->outputsize; rq->pageofs_in = 0, cur += insz, ni++) {
317317
insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize);
318318
rq->outputsize -= insz;
319319
if (!rq->in[ni])

fs/erofs/zdata.c

Lines changed: 45 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ struct z_erofs_pcluster {
4444
/* A: point to next chained pcluster or TAILs */
4545
struct z_erofs_pcluster *next;
4646

47-
/* I: start block address of this pcluster */
48-
erofs_off_t index;
47+
/* I: start physical position of this pcluster */
48+
erofs_off_t pos;
4949

5050
/* L: the maximum decompression size of this round */
5151
unsigned int length;
@@ -73,6 +73,9 @@ struct z_erofs_pcluster {
7373
/* I: compression algorithm format */
7474
unsigned char algorithmformat;
7575

76+
/* I: whether compressed data is in-lined or not */
77+
bool from_meta;
78+
7679
/* L: whether partial decompression or not */
7780
bool partial;
7881

@@ -102,14 +105,9 @@ struct z_erofs_decompressqueue {
102105
bool eio, sync;
103106
};
104107

105-
static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
106-
{
107-
return !pcl->index;
108-
}
109-
110108
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
111109
{
112-
return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
110+
return PAGE_ALIGN(pcl->pageofs_in + pcl->pclustersize) >> PAGE_SHIFT;
113111
}
114112

115113
static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
@@ -133,7 +131,7 @@ struct z_erofs_pcluster_slab {
133131

134132
static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
135133
_PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
136-
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
134+
_PCLP(Z_EROFS_PCLUSTER_MAX_PAGES + 1)
137135
};
138136

139137
struct z_erofs_bvec_iter {
@@ -267,7 +265,6 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
267265
pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
268266
if (!pcl)
269267
return ERR_PTR(-ENOMEM);
270-
pcl->pclustersize = size;
271268
return pcl;
272269
}
273270
return ERR_PTR(-EINVAL);
@@ -516,6 +513,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
516513
struct z_erofs_pcluster *pcl = fe->pcl;
517514
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
518515
bool shouldalloc = z_erofs_should_alloc_cache(fe);
516+
pgoff_t poff = pcl->pos >> PAGE_SHIFT;
519517
bool may_bypass = true;
520518
/* Optimistic allocation, as in-place I/O can be used as a fallback */
521519
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
@@ -532,7 +530,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
532530
if (READ_ONCE(pcl->compressed_bvecs[i].page))
533531
continue;
534532

535-
folio = filemap_get_folio(mc, pcl->index + i);
533+
folio = filemap_get_folio(mc, poff + i);
536534
if (IS_ERR(folio)) {
537535
may_bypass = false;
538536
if (!shouldalloc)
@@ -575,7 +573,7 @@ static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
575573
struct folio *folio;
576574
int i;
577575

578-
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
576+
DBG_BUGON(pcl->from_meta);
579577
/* Each cached folio contains one page unless bs > ps is supported */
580578
for (i = 0; i < pclusterpages; ++i) {
581579
if (pcl->compressed_bvecs[i].page) {
@@ -607,7 +605,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
607605
ret = false;
608606
spin_lock(&pcl->lockref.lock);
609607
if (pcl->lockref.count <= 0) {
610-
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
608+
DBG_BUGON(pcl->from_meta);
611609
for (; bvec < end; ++bvec) {
612610
if (bvec->page && page_folio(bvec->page) == folio) {
613611
bvec->page = NULL;
@@ -667,16 +665,20 @@ static int z_erofs_attach_page(struct z_erofs_frontend *fe,
667665
int ret;
668666

669667
if (exclusive) {
670-
/* give priority for inplaceio to use file pages first */
671-
spin_lock(&pcl->lockref.lock);
672-
while (fe->icur > 0) {
673-
if (pcl->compressed_bvecs[--fe->icur].page)
674-
continue;
675-
pcl->compressed_bvecs[fe->icur] = *bvec;
668+
/* Inplace I/O is limited to one page for uncompressed data */
669+
if (pcl->algorithmformat < Z_EROFS_COMPRESSION_MAX ||
670+
fe->icur <= 1) {
671+
/* Try to prioritize inplace I/O here */
672+
spin_lock(&pcl->lockref.lock);
673+
while (fe->icur > 0) {
674+
if (pcl->compressed_bvecs[--fe->icur].page)
675+
continue;
676+
pcl->compressed_bvecs[fe->icur] = *bvec;
677+
spin_unlock(&pcl->lockref.lock);
678+
return 0;
679+
}
676680
spin_unlock(&pcl->lockref.lock);
677-
return 0;
678681
}
679-
spin_unlock(&pcl->lockref.lock);
680682

681683
/* otherwise, check if it can be used as a bvpage */
682684
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
@@ -711,27 +713,26 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
711713
struct erofs_map_blocks *map = &fe->map;
712714
struct super_block *sb = fe->inode->i_sb;
713715
struct erofs_sb_info *sbi = EROFS_SB(sb);
714-
bool ztailpacking = map->m_flags & EROFS_MAP_META;
715716
struct z_erofs_pcluster *pcl, *pre;
717+
unsigned int pageofs_in;
716718
int err;
717719

718-
if (!(map->m_flags & EROFS_MAP_ENCODED) ||
719-
(!ztailpacking && !erofs_blknr(sb, map->m_pa))) {
720-
DBG_BUGON(1);
721-
return -EFSCORRUPTED;
722-
}
723-
724-
/* no available pcluster, let's allocate one */
725-
pcl = z_erofs_alloc_pcluster(map->m_plen);
720+
pageofs_in = erofs_blkoff(sb, map->m_pa);
721+
pcl = z_erofs_alloc_pcluster(pageofs_in + map->m_plen);
726722
if (IS_ERR(pcl))
727723
return PTR_ERR(pcl);
728724

729725
lockref_init(&pcl->lockref); /* one ref for this request */
730726
pcl->algorithmformat = map->m_algorithmformat;
727+
pcl->pclustersize = map->m_plen;
728+
pcl->pageofs_in = pageofs_in;
731729
pcl->length = 0;
732730
pcl->partial = true;
733731
pcl->next = fe->head;
732+
pcl->pos = map->m_pa;
733+
pcl->pageofs_in = pageofs_in;
734734
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
735+
pcl->from_meta = map->m_flags & EROFS_MAP_META;
735736
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
736737

737738
/*
@@ -741,13 +742,10 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
741742
mutex_init(&pcl->lock);
742743
DBG_BUGON(!mutex_trylock(&pcl->lock));
743744

744-
if (ztailpacking) {
745-
pcl->index = 0; /* which indicates ztailpacking */
746-
} else {
747-
pcl->index = erofs_blknr(sb, map->m_pa);
745+
if (!pcl->from_meta) {
748746
while (1) {
749747
xa_lock(&sbi->managed_pslots);
750-
pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->index,
748+
pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->pos,
751749
NULL, pcl, GFP_KERNEL);
752750
if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) {
753751
xa_unlock(&sbi->managed_pslots);
@@ -779,7 +777,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
779777
{
780778
struct erofs_map_blocks *map = &fe->map;
781779
struct super_block *sb = fe->inode->i_sb;
782-
erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
783780
struct z_erofs_pcluster *pcl = NULL;
784781
int ret;
785782

@@ -790,9 +787,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
790787
if (!(map->m_flags & EROFS_MAP_META)) {
791788
while (1) {
792789
rcu_read_lock();
793-
pcl = xa_load(&EROFS_SB(sb)->managed_pslots, blknr);
790+
pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa);
794791
if (!pcl || z_erofs_get_pcluster(pcl)) {
795-
DBG_BUGON(pcl && blknr != pcl->index);
792+
DBG_BUGON(pcl && map->m_pa != pcl->pos);
796793
rcu_read_unlock();
797794
break;
798795
}
@@ -826,7 +823,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
826823

827824
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
828825
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
829-
if (!z_erofs_is_inline_pcluster(fe->pcl)) {
826+
if (!fe->pcl->from_meta) {
830827
/* bind cache first when cached decompression is preferred */
831828
z_erofs_bind_cache(fe);
832829
} else {
@@ -871,7 +868,7 @@ static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
871868
* It's impossible to fail after the pcluster is freezed, but in order
872869
* to avoid some race conditions, add a DBG_BUGON to observe this.
873870
*/
874-
DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl);
871+
DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->pos) != pcl);
875872

876873
lockref_mark_dead(&pcl->lockref);
877874
return true;
@@ -1221,7 +1218,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
12211218
}
12221219
be->compressed_pages[i] = page;
12231220

1224-
if (z_erofs_is_inline_pcluster(pcl) ||
1221+
if (pcl->from_meta ||
12251222
erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
12261223
if (!PageUptodate(page))
12271224
err = -EIO;
@@ -1299,7 +1296,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
12991296
}, be->pagepool);
13001297

13011298
/* must handle all compressed pages before actual file pages */
1302-
if (z_erofs_is_inline_pcluster(pcl)) {
1299+
if (pcl->from_meta) {
13031300
page = pcl->compressed_bvecs[0].page;
13041301
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
13051302
put_page(page);
@@ -1359,7 +1356,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
13591356
WRITE_ONCE(pcl->next, NULL);
13601357
mutex_unlock(&pcl->lock);
13611358

1362-
if (z_erofs_is_inline_pcluster(pcl))
1359+
if (pcl->from_meta)
13631360
z_erofs_free_pcluster(pcl);
13641361
else
13651362
z_erofs_put_pcluster(sbi, pcl, try_free);
@@ -1540,7 +1537,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
15401537
folio = page_folio(page);
15411538
out_tocache:
15421539
if (!tocache || bs != PAGE_SIZE ||
1543-
filemap_add_folio(mc, folio, pcl->index + nr, gfp)) {
1540+
filemap_add_folio(mc, folio, (pcl->pos >> PAGE_SHIFT) + nr, gfp)) {
15441541
/* turn into a temporary shortlived folio (1 ref) */
15451542
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
15461543
return;
@@ -1657,19 +1654,20 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
16571654

16581655
pcl = next;
16591656
next = READ_ONCE(pcl->next);
1660-
if (z_erofs_is_inline_pcluster(pcl)) {
1657+
if (pcl->from_meta) {
16611658
z_erofs_move_to_bypass_queue(pcl, next, qtail);
16621659
continue;
16631660
}
16641661

16651662
/* no device id here, thus it will always succeed */
16661663
mdev = (struct erofs_map_dev) {
1667-
.m_pa = erofs_pos(sb, pcl->index),
1664+
.m_pa = round_down(pcl->pos, sb->s_blocksize),
16681665
};
16691666
(void)erofs_map_dev(sb, &mdev);
16701667

16711668
cur = mdev.m_pa;
1672-
end = cur + pcl->pclustersize;
1669+
end = round_up(cur + pcl->pageofs_in + pcl->pclustersize,
1670+
sb->s_blocksize);
16731671
do {
16741672
bvec.bv_page = NULL;
16751673
if (bio && (cur != last_pa ||

0 commit comments

Comments
 (0)