Skip to content

Commit 69a3a0a

Browse files
committed
Merge tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "In this cycle, we add file-backed mount support, which has has been a strong requirement for years. It is especially useful when there are thousands of images running on the same host for containers and other sandbox use cases, unlike OS image use cases. Without file-backed mounts, it's hard for container runtimes to manage and isolate so many unnecessary virtual block devices safely and efficiently, therefore file-backed mounts are highly preferred. For EROFS users, ComposeFS [1], containerd, and Android APEXes [2] will directly benefit from it, and I've seen no risk in implementing it as a completely immutable filesystem. The previous experimental feature "EROFS over fscache" is now marked as deprecated because: - Fscache is no longer an independent subsystem and has been merged into netfs, which was somewhat unexpected when it was proposed. - New HSM "fanotify pre-content hooks" [3] will be landed upstream. These hooks will replace "EROFS over fscache" in a simpler way, as EROFS won't be bother with kernel caching anymore. Userspace programs can also manage their own caching hierarchy more flexibly. Once the HSM "fanotify pre-content hooks" is landed, I will remove the fscache backend entirely as an internal dependency cleanup. More backgrounds are listed in the original patchset [4]. In addition to that, there are bugfixes and cleanups as usual. Summary: - Support file-backed mounts for containers and sandboxes - Mark the experimental fscache backend as deprecated - Handle overlapped pclusters caused by crafted images properly - Fix a failure path which could cause infinite loops in z_erofs_init_decompressor() - Get rid of unnecessary NOFAILs - Harmless on-disk hardening & minor cleanups" * tag 'erofs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: reject inodes with negative i_size erofs: restrict pcluster size limitations erofs: allocate more short-lived pages from reserved pool first erofs: sunset unneeded NOFAILs erofs: simplify erofs_map_blocks_flatmode() erofs: refactor read_inode calling convention erofs: use kmemdup_nul in erofs_fill_symlink erofs: mark experimental fscache backend deprecated erofs: support compressed inodes for fileio erofs: support unencoded inodes for fileio erofs: add file-backed mount support erofs: handle overlapped pclusters out of crafted images properly erofs: fix error handling in z_erofs_init_decompressor erofs: clean up erofs_register_sysfs() erofs: fix incorrect symlink detection in fast symlink
2 parents 7a40974 + 025497e commit 69a3a0a

File tree

12 files changed

+544
-299
lines changed

12 files changed

+544
-299
lines changed

fs/erofs/Kconfig

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,23 @@ config EROFS_FS_SECURITY
7474

7575
If you are not using a security module, say N.
7676

77+
config EROFS_FS_BACKED_BY_FILE
78+
bool "File-backed EROFS filesystem support"
79+
depends on EROFS_FS
80+
default y
81+
help
82+
This allows EROFS to use filesystem image files directly, without
83+
the intercession of loopback block devices or likewise. It is
84+
particularly useful for container images with numerous blobs and
85+
other sandboxes, where loop devices behave intricately. It can also
86+
be used to simplify error-prone lifetime management of unnecessary
87+
virtual block devices.
88+
89+
Note that this feature, along with ongoing fanotify pre-content
90+
hooks, will eventually replace "EROFS over fscache."
91+
92+
If you don't want to enable this feature, say N.
93+
7794
config EROFS_FS_ZIP
7895
bool "EROFS Data Compression Support"
7996
depends on EROFS_FS
@@ -128,7 +145,7 @@ config EROFS_FS_ZIP_ZSTD
128145
If unsure, say N.
129146

130147
config EROFS_FS_ONDEMAND
131-
bool "EROFS fscache-based on-demand read support"
148+
bool "EROFS fscache-based on-demand read support (deprecated)"
132149
depends on EROFS_FS
133150
select NETFS_SUPPORT
134151
select FSCACHE
@@ -138,6 +155,9 @@ config EROFS_FS_ONDEMAND
138155
This permits EROFS to use fscache-backed data blobs with on-demand
139156
read support.
140157

158+
It is now deprecated and scheduled to be removed from the kernel
159+
after fanotify pre-content hooks are landed.
160+
141161
If unsure, say N.
142162

143163
config EROFS_FS_PCPU_KTHREAD

fs/erofs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
77
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
88
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
99
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
10+
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
1011
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o

fs/erofs/data.c

Lines changed: 74 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,12 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
5959

6060
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
6161
{
62-
if (erofs_is_fscache_mode(sb))
63-
buf->mapping = EROFS_SB(sb)->s_fscache->inode->i_mapping;
62+
struct erofs_sb_info *sbi = EROFS_SB(sb);
63+
64+
if (erofs_is_fileio_mode(sbi))
65+
buf->mapping = file_inode(sbi->fdev)->i_mapping;
66+
else if (erofs_is_fscache_mode(sb))
67+
buf->mapping = sbi->s_fscache->inode->i_mapping;
6468
else
6569
buf->mapping = sb->s_bdev->bd_mapping;
6670
}
@@ -75,38 +79,28 @@ void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
7579
static int erofs_map_blocks_flatmode(struct inode *inode,
7680
struct erofs_map_blocks *map)
7781
{
78-
erofs_blk_t nblocks, lastblk;
79-
u64 offset = map->m_la;
8082
struct erofs_inode *vi = EROFS_I(inode);
8183
struct super_block *sb = inode->i_sb;
8284
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
85+
erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking;
8386

84-
nblocks = erofs_iblks(inode);
85-
lastblk = nblocks - tailendpacking;
86-
87-
/* there is no hole in flatmode */
88-
map->m_flags = EROFS_MAP_MAPPED;
89-
if (offset < erofs_pos(sb, lastblk)) {
87+
map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */
88+
if (map->m_la < erofs_pos(sb, lastblk)) {
9089
map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
91-
map->m_plen = erofs_pos(sb, lastblk) - offset;
92-
} else if (tailendpacking) {
90+
map->m_plen = erofs_pos(sb, lastblk) - map->m_la;
91+
} else {
92+
DBG_BUGON(!tailendpacking);
9393
map->m_pa = erofs_iloc(inode) + vi->inode_isize +
94-
vi->xattr_isize + erofs_blkoff(sb, offset);
95-
map->m_plen = inode->i_size - offset;
94+
vi->xattr_isize + erofs_blkoff(sb, map->m_la);
95+
map->m_plen = inode->i_size - map->m_la;
9696

9797
/* inline data should be located in the same meta block */
9898
if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
99-
erofs_err(sb, "inline data cross block boundary @ nid %llu",
100-
vi->nid);
99+
erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
101100
DBG_BUGON(1);
102101
return -EFSCORRUPTED;
103102
}
104103
map->m_flags |= EROFS_MAP_META;
105-
} else {
106-
erofs_err(sb, "internal error @ nid: %llu (size %llu), m_la 0x%llx",
107-
vi->nid, inode->i_size, map->m_la);
108-
DBG_BUGON(1);
109-
return -EIO;
110104
}
111105
return 0;
112106
}
@@ -128,7 +122,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
128122
if (map->m_la >= inode->i_size) {
129123
/* leave out-of-bound access unmapped */
130124
map->m_flags = 0;
131-
map->m_plen = 0;
125+
map->m_plen = map->m_llen;
132126
goto out;
133127
}
134128

@@ -189,16 +183,34 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
189183
return err;
190184
}
191185

186+
static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
187+
struct erofs_device_info *dif)
188+
{
189+
map->m_bdev = NULL;
190+
map->m_fp = NULL;
191+
if (dif->file) {
192+
if (S_ISBLK(file_inode(dif->file)->i_mode))
193+
map->m_bdev = file_bdev(dif->file);
194+
else
195+
map->m_fp = dif->file;
196+
}
197+
map->m_daxdev = dif->dax_dev;
198+
map->m_dax_part_off = dif->dax_part_off;
199+
map->m_fscache = dif->fscache;
200+
}
201+
192202
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
193203
{
194204
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
195205
struct erofs_device_info *dif;
206+
erofs_off_t startoff, length;
196207
int id;
197208

198209
map->m_bdev = sb->s_bdev;
199210
map->m_daxdev = EROFS_SB(sb)->dax_dev;
200211
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
201212
map->m_fscache = EROFS_SB(sb)->s_fscache;
213+
map->m_fp = EROFS_SB(sb)->fdev;
202214

203215
if (map->m_deviceid) {
204216
down_read(&devs->rwsem);
@@ -212,29 +224,20 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
212224
up_read(&devs->rwsem);
213225
return 0;
214226
}
215-
map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL;
216-
map->m_daxdev = dif->dax_dev;
217-
map->m_dax_part_off = dif->dax_part_off;
218-
map->m_fscache = dif->fscache;
227+
erofs_fill_from_devinfo(map, dif);
219228
up_read(&devs->rwsem);
220229
} else if (devs->extra_devices && !devs->flatdev) {
221230
down_read(&devs->rwsem);
222231
idr_for_each_entry(&devs->tree, dif, id) {
223-
erofs_off_t startoff, length;
224-
225232
if (!dif->mapped_blkaddr)
226233
continue;
234+
227235
startoff = erofs_pos(sb, dif->mapped_blkaddr);
228236
length = erofs_pos(sb, dif->blocks);
229-
230237
if (map->m_pa >= startoff &&
231238
map->m_pa < startoff + length) {
232239
map->m_pa -= startoff;
233-
map->m_bdev = dif->bdev_file ?
234-
file_bdev(dif->bdev_file) : NULL;
235-
map->m_daxdev = dif->dax_dev;
236-
map->m_dax_part_off = dif->dax_part_off;
237-
map->m_fscache = dif->fscache;
240+
erofs_fill_from_devinfo(map, dif);
238241
break;
239242
}
240243
}
@@ -243,6 +246,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
243246
return 0;
244247
}
245248

249+
/*
250+
* bit 30: I/O error occurred on this folio
251+
* bit 0 - 29: remaining parts to complete this folio
252+
*/
253+
#define EROFS_ONLINEFOLIO_EIO (1 << 30)
254+
255+
void erofs_onlinefolio_init(struct folio *folio)
256+
{
257+
union {
258+
atomic_t o;
259+
void *v;
260+
} u = { .o = ATOMIC_INIT(1) };
261+
262+
folio->private = u.v; /* valid only if file-backed folio is locked */
263+
}
264+
265+
void erofs_onlinefolio_split(struct folio *folio)
266+
{
267+
atomic_inc((atomic_t *)&folio->private);
268+
}
269+
270+
void erofs_onlinefolio_end(struct folio *folio, int err)
271+
{
272+
int orig, v;
273+
274+
do {
275+
orig = atomic_read((atomic_t *)&folio->private);
276+
v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
277+
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);
278+
279+
if (v & ~EROFS_ONLINEFOLIO_EIO)
280+
return;
281+
folio->private = 0;
282+
folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
283+
}
284+
246285
static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
247286
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
248287
{
@@ -392,7 +431,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
392431
}
393432

394433
/* for uncompressed (aligned) files and raw access for other files */
395-
const struct address_space_operations erofs_raw_access_aops = {
434+
const struct address_space_operations erofs_aops = {
396435
.read_folio = erofs_read_folio,
397436
.readahead = erofs_readahead,
398437
.bmap = erofs_bmap,

fs/erofs/decompressor.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ int __init z_erofs_init_decompressor(void)
539539
for (i = 0; i < Z_EROFS_COMPRESSION_MAX; ++i) {
540540
err = z_erofs_decomp[i] ? z_erofs_decomp[i]->init() : 0;
541541
if (err) {
542-
while (--i)
542+
while (i--)
543543
if (z_erofs_decomp[i])
544544
z_erofs_decomp[i]->exit();
545545
return err;

fs/erofs/erofs_fs.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,12 @@ struct erofs_dirent {
288288

289289
#define EROFS_NAME_LEN 255
290290

291-
/* maximum supported size of a physical compression cluster */
291+
/* maximum supported encoded size of a physical compressed cluster */
292292
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
293293

294+
/* maximum supported decoded size of a physical compressed cluster */
295+
#define Z_EROFS_PCLUSTER_MAX_DSIZE (12 * 1024 * 1024)
296+
294297
/* available compression algorithm types (for h_algorithmtype) */
295298
enum {
296299
Z_EROFS_COMPRESSION_LZ4 = 0,

0 commit comments

Comments
 (0)