@@ -44,8 +44,8 @@ struct z_erofs_pcluster {
44
44
/* A: point to next chained pcluster or TAILs */
45
45
struct z_erofs_pcluster * next ;
46
46
47
- /* I: start block address of this pcluster */
48
- erofs_off_t index ;
47
+ /* I: start physical position of this pcluster */
48
+ erofs_off_t pos ;
49
49
50
50
/* L: the maximum decompression size of this round */
51
51
unsigned int length ;
@@ -73,6 +73,9 @@ struct z_erofs_pcluster {
73
73
/* I: compression algorithm format */
74
74
unsigned char algorithmformat ;
75
75
76
+ /* I: whether compressed data is in-lined or not */
77
+ bool from_meta ;
78
+
76
79
/* L: whether partial decompression or not */
77
80
bool partial ;
78
81
@@ -102,14 +105,9 @@ struct z_erofs_decompressqueue {
102
105
bool eio , sync ;
103
106
};
104
107
105
- static inline bool z_erofs_is_inline_pcluster (struct z_erofs_pcluster * pcl )
106
- {
107
- return !pcl -> index ;
108
- }
109
-
110
108
static inline unsigned int z_erofs_pclusterpages (struct z_erofs_pcluster * pcl )
111
109
{
112
- return PAGE_ALIGN (pcl -> pclustersize ) >> PAGE_SHIFT ;
110
+ return PAGE_ALIGN (pcl -> pageofs_in + pcl -> pclustersize ) >> PAGE_SHIFT ;
113
111
}
114
112
115
113
static bool erofs_folio_is_managed (struct erofs_sb_info * sbi , struct folio * fo )
@@ -133,7 +131,7 @@ struct z_erofs_pcluster_slab {
133
131
134
132
static struct z_erofs_pcluster_slab pcluster_pool [] __read_mostly = {
135
133
_PCLP (1 ), _PCLP (4 ), _PCLP (16 ), _PCLP (64 ), _PCLP (128 ),
136
- _PCLP (Z_EROFS_PCLUSTER_MAX_PAGES )
134
+ _PCLP (Z_EROFS_PCLUSTER_MAX_PAGES + 1 )
137
135
};
138
136
139
137
struct z_erofs_bvec_iter {
@@ -267,7 +265,6 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
267
265
pcl = kmem_cache_zalloc (pcs -> slab , GFP_KERNEL );
268
266
if (!pcl )
269
267
return ERR_PTR (- ENOMEM );
270
- pcl -> pclustersize = size ;
271
268
return pcl ;
272
269
}
273
270
return ERR_PTR (- EINVAL );
@@ -516,6 +513,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
516
513
struct z_erofs_pcluster * pcl = fe -> pcl ;
517
514
unsigned int pclusterpages = z_erofs_pclusterpages (pcl );
518
515
bool shouldalloc = z_erofs_should_alloc_cache (fe );
516
+ pgoff_t poff = pcl -> pos >> PAGE_SHIFT ;
519
517
bool may_bypass = true;
520
518
/* Optimistic allocation, as in-place I/O can be used as a fallback */
521
519
gfp_t gfp = (mapping_gfp_mask (mc ) & ~__GFP_DIRECT_RECLAIM ) |
@@ -532,7 +530,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
532
530
if (READ_ONCE (pcl -> compressed_bvecs [i ].page ))
533
531
continue ;
534
532
535
- folio = filemap_get_folio (mc , pcl -> index + i );
533
+ folio = filemap_get_folio (mc , poff + i );
536
534
if (IS_ERR (folio )) {
537
535
may_bypass = false;
538
536
if (!shouldalloc )
@@ -575,7 +573,7 @@ static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
575
573
struct folio * folio ;
576
574
int i ;
577
575
578
- DBG_BUGON (z_erofs_is_inline_pcluster ( pcl ) );
576
+ DBG_BUGON (pcl -> from_meta );
579
577
/* Each cached folio contains one page unless bs > ps is supported */
580
578
for (i = 0 ; i < pclusterpages ; ++ i ) {
581
579
if (pcl -> compressed_bvecs [i ].page ) {
@@ -607,7 +605,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
607
605
ret = false;
608
606
spin_lock (& pcl -> lockref .lock );
609
607
if (pcl -> lockref .count <= 0 ) {
610
- DBG_BUGON (z_erofs_is_inline_pcluster ( pcl ) );
608
+ DBG_BUGON (pcl -> from_meta );
611
609
for (; bvec < end ; ++ bvec ) {
612
610
if (bvec -> page && page_folio (bvec -> page ) == folio ) {
613
611
bvec -> page = NULL ;
@@ -667,16 +665,20 @@ static int z_erofs_attach_page(struct z_erofs_frontend *fe,
667
665
int ret ;
668
666
669
667
if (exclusive ) {
670
- /* give priority for inplaceio to use file pages first */
671
- spin_lock (& pcl -> lockref .lock );
672
- while (fe -> icur > 0 ) {
673
- if (pcl -> compressed_bvecs [-- fe -> icur ].page )
674
- continue ;
675
- pcl -> compressed_bvecs [fe -> icur ] = * bvec ;
668
+ /* Inplace I/O is limited to one page for uncompressed data */
669
+ if (pcl -> algorithmformat < Z_EROFS_COMPRESSION_MAX ||
670
+ fe -> icur <= 1 ) {
671
+ /* Try to prioritize inplace I/O here */
672
+ spin_lock (& pcl -> lockref .lock );
673
+ while (fe -> icur > 0 ) {
674
+ if (pcl -> compressed_bvecs [-- fe -> icur ].page )
675
+ continue ;
676
+ pcl -> compressed_bvecs [fe -> icur ] = * bvec ;
677
+ spin_unlock (& pcl -> lockref .lock );
678
+ return 0 ;
679
+ }
676
680
spin_unlock (& pcl -> lockref .lock );
677
- return 0 ;
678
681
}
679
- spin_unlock (& pcl -> lockref .lock );
680
682
681
683
/* otherwise, check if it can be used as a bvpage */
682
684
if (fe -> mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
@@ -711,27 +713,26 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
711
713
struct erofs_map_blocks * map = & fe -> map ;
712
714
struct super_block * sb = fe -> inode -> i_sb ;
713
715
struct erofs_sb_info * sbi = EROFS_SB (sb );
714
- bool ztailpacking = map -> m_flags & EROFS_MAP_META ;
715
716
struct z_erofs_pcluster * pcl , * pre ;
717
+ unsigned int pageofs_in ;
716
718
int err ;
717
719
718
- if (!(map -> m_flags & EROFS_MAP_ENCODED ) ||
719
- (!ztailpacking && !erofs_blknr (sb , map -> m_pa ))) {
720
- DBG_BUGON (1 );
721
- return - EFSCORRUPTED ;
722
- }
723
-
724
- /* no available pcluster, let's allocate one */
725
- pcl = z_erofs_alloc_pcluster (map -> m_plen );
720
+ pageofs_in = erofs_blkoff (sb , map -> m_pa );
721
+ pcl = z_erofs_alloc_pcluster (pageofs_in + map -> m_plen );
726
722
if (IS_ERR (pcl ))
727
723
return PTR_ERR (pcl );
728
724
729
725
lockref_init (& pcl -> lockref ); /* one ref for this request */
730
726
pcl -> algorithmformat = map -> m_algorithmformat ;
727
+ pcl -> pclustersize = map -> m_plen ;
728
+ pcl -> pageofs_in = pageofs_in ;
731
729
pcl -> length = 0 ;
732
730
pcl -> partial = true;
733
731
pcl -> next = fe -> head ;
732
+ pcl -> pos = map -> m_pa ;
733
+ pcl -> pageofs_in = pageofs_in ;
734
734
pcl -> pageofs_out = map -> m_la & ~PAGE_MASK ;
735
+ pcl -> from_meta = map -> m_flags & EROFS_MAP_META ;
735
736
fe -> mode = Z_EROFS_PCLUSTER_FOLLOWED ;
736
737
737
738
/*
@@ -741,13 +742,10 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
741
742
mutex_init (& pcl -> lock );
742
743
DBG_BUGON (!mutex_trylock (& pcl -> lock ));
743
744
744
- if (ztailpacking ) {
745
- pcl -> index = 0 ; /* which indicates ztailpacking */
746
- } else {
747
- pcl -> index = erofs_blknr (sb , map -> m_pa );
745
+ if (!pcl -> from_meta ) {
748
746
while (1 ) {
749
747
xa_lock (& sbi -> managed_pslots );
750
- pre = __xa_cmpxchg (& sbi -> managed_pslots , pcl -> index ,
748
+ pre = __xa_cmpxchg (& sbi -> managed_pslots , pcl -> pos ,
751
749
NULL , pcl , GFP_KERNEL );
752
750
if (!pre || xa_is_err (pre ) || z_erofs_get_pcluster (pre )) {
753
751
xa_unlock (& sbi -> managed_pslots );
@@ -779,7 +777,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
779
777
{
780
778
struct erofs_map_blocks * map = & fe -> map ;
781
779
struct super_block * sb = fe -> inode -> i_sb ;
782
- erofs_blk_t blknr = erofs_blknr (sb , map -> m_pa );
783
780
struct z_erofs_pcluster * pcl = NULL ;
784
781
int ret ;
785
782
@@ -790,9 +787,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
790
787
if (!(map -> m_flags & EROFS_MAP_META )) {
791
788
while (1 ) {
792
789
rcu_read_lock ();
793
- pcl = xa_load (& EROFS_SB (sb )-> managed_pslots , blknr );
790
+ pcl = xa_load (& EROFS_SB (sb )-> managed_pslots , map -> m_pa );
794
791
if (!pcl || z_erofs_get_pcluster (pcl )) {
795
- DBG_BUGON (pcl && blknr != pcl -> index );
792
+ DBG_BUGON (pcl && map -> m_pa != pcl -> pos );
796
793
rcu_read_unlock ();
797
794
break ;
798
795
}
@@ -826,7 +823,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
826
823
827
824
z_erofs_bvec_iter_begin (& fe -> biter , & fe -> pcl -> bvset ,
828
825
Z_EROFS_INLINE_BVECS , fe -> pcl -> vcnt );
829
- if (!z_erofs_is_inline_pcluster ( fe -> pcl ) ) {
826
+ if (!fe -> pcl -> from_meta ) {
830
827
/* bind cache first when cached decompression is preferred */
831
828
z_erofs_bind_cache (fe );
832
829
} else {
@@ -871,7 +868,7 @@ static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
871
868
* It's impossible to fail after the pcluster is freezed, but in order
872
869
* to avoid some race conditions, add a DBG_BUGON to observe this.
873
870
*/
874
- DBG_BUGON (__xa_erase (& sbi -> managed_pslots , pcl -> index ) != pcl );
871
+ DBG_BUGON (__xa_erase (& sbi -> managed_pslots , pcl -> pos ) != pcl );
875
872
876
873
lockref_mark_dead (& pcl -> lockref );
877
874
return true;
@@ -1221,7 +1218,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
1221
1218
}
1222
1219
be -> compressed_pages [i ] = page ;
1223
1220
1224
- if (z_erofs_is_inline_pcluster ( pcl ) ||
1221
+ if (pcl -> from_meta ||
1225
1222
erofs_folio_is_managed (EROFS_SB (be -> sb ), page_folio (page ))) {
1226
1223
if (!PageUptodate (page ))
1227
1224
err = - EIO ;
@@ -1299,7 +1296,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
1299
1296
}, be -> pagepool );
1300
1297
1301
1298
/* must handle all compressed pages before actual file pages */
1302
- if (z_erofs_is_inline_pcluster ( pcl ) ) {
1299
+ if (pcl -> from_meta ) {
1303
1300
page = pcl -> compressed_bvecs [0 ].page ;
1304
1301
WRITE_ONCE (pcl -> compressed_bvecs [0 ].page , NULL );
1305
1302
put_page (page );
@@ -1359,7 +1356,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
1359
1356
WRITE_ONCE (pcl -> next , NULL );
1360
1357
mutex_unlock (& pcl -> lock );
1361
1358
1362
- if (z_erofs_is_inline_pcluster ( pcl ) )
1359
+ if (pcl -> from_meta )
1363
1360
z_erofs_free_pcluster (pcl );
1364
1361
else
1365
1362
z_erofs_put_pcluster (sbi , pcl , try_free );
@@ -1540,7 +1537,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
1540
1537
folio = page_folio (page );
1541
1538
out_tocache :
1542
1539
if (!tocache || bs != PAGE_SIZE ||
1543
- filemap_add_folio (mc , folio , pcl -> index + nr , gfp )) {
1540
+ filemap_add_folio (mc , folio , ( pcl -> pos >> PAGE_SHIFT ) + nr , gfp )) {
1544
1541
/* turn into a temporary shortlived folio (1 ref) */
1545
1542
folio -> private = (void * )Z_EROFS_SHORTLIVED_PAGE ;
1546
1543
return ;
@@ -1657,19 +1654,20 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
1657
1654
1658
1655
pcl = next ;
1659
1656
next = READ_ONCE (pcl -> next );
1660
- if (z_erofs_is_inline_pcluster ( pcl ) ) {
1657
+ if (pcl -> from_meta ) {
1661
1658
z_erofs_move_to_bypass_queue (pcl , next , qtail );
1662
1659
continue ;
1663
1660
}
1664
1661
1665
1662
/* no device id here, thus it will always succeed */
1666
1663
mdev = (struct erofs_map_dev ) {
1667
- .m_pa = erofs_pos ( sb , pcl -> index ),
1664
+ .m_pa = round_down ( pcl -> pos , sb -> s_blocksize ),
1668
1665
};
1669
1666
(void )erofs_map_dev (sb , & mdev );
1670
1667
1671
1668
cur = mdev .m_pa ;
1672
- end = cur + pcl -> pclustersize ;
1669
+ end = round_up (cur + pcl -> pageofs_in + pcl -> pclustersize ,
1670
+ sb -> s_blocksize );
1673
1671
do {
1674
1672
bvec .bv_page = NULL ;
1675
1673
if (bio && (cur != last_pa ||
0 commit comments