Skip to content

Commit dd2c019

Browse files
committed
Merge tag 'erofs-for-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "In this cycle, a xattr bloom filter feature is introduced to speed up negative xattr lookups, which was originally suggested by Alexander for Composefs use cases. Additionally, the DEFLATE algorithm is now supported, which can be used together with hardware accelerators for our cloud workloads. Each supported compression algorithm can be selected on a per-file basis for specific access patterns too. There are also some random fixes and cleanups as usual: - Support xattr bloom filter to optimize negative xattr lookups - Support DEFLATE compression algorithm as an alternative - Fix a regression that ztailpacking pclusters don't release properly - Avoid warning dedupe and fragments features anymore - Some folio conversions and cleanups" * tag 'erofs-for-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: release ztailpacking pclusters properly erofs: don't warn dedupe and fragments features anymore erofs: adapt folios for z_erofs_read_folio() erofs: adapt folios for z_erofs_readahead() erofs: get rid of fe->backmost for cache decompression erofs: drop z_erofs_page_mark_eio() erofs: tidy up z_erofs_do_read_page() erofs: move preparation logic into z_erofs_pcluster_begin() erofs: avoid obsolete {collector,collection} terms erofs: simplify z_erofs_read_fragment() erofs: remove redundant erofs_fs_type declaration in super.c erofs: add necessary kmem_cache_create flags for erofs inode cache erofs: clean up redundant comment and adjust code alignment erofs: refine warning messages for zdata I/Os erofs: boost negative xattr lookup with bloom filter erofs: update on-disk format for xattr name filter erofs: DEFLATE compression support
2 parents f20ae9c + 91b1ad0 commit dd2c019

File tree

12 files changed

+467
-198
lines changed

12 files changed

+467
-198
lines changed

fs/erofs/Kconfig

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ config EROFS_FS_DEBUG
3838
config EROFS_FS_XATTR
3939
bool "EROFS extended attributes"
4040
depends on EROFS_FS
41+
select XXHASH
4142
default y
4243
help
4344
Extended attributes are name:value pairs associated with inodes by
@@ -99,6 +100,21 @@ config EROFS_FS_ZIP_LZMA
99100

100101
If unsure, say N.
101102

103+
config EROFS_FS_ZIP_DEFLATE
104+
bool "EROFS DEFLATE compressed data support"
105+
depends on EROFS_FS_ZIP
106+
select ZLIB_INFLATE
107+
help
108+
Saying Y here includes support for reading EROFS file systems
109+
containing DEFLATE compressed data. It gives better compression
110+
ratios than the default LZ4 format, while it costs more CPU
111+
overhead.
112+
113+
DEFLATE support is an experimental feature for now and so most
114+
file systems will be readable without selecting this option.
115+
116+
If unsure, say N.
117+
102118
config EROFS_FS_ONDEMAND
103119
bool "EROFS fscache-based on-demand read support"
104120
depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)

fs/erofs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
55
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
66
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
77
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
8+
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
89
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o

fs/erofs/compress.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,6 @@ extern const struct z_erofs_decompressor erofs_decompressors[];
9494
/* prototypes for specific algorithms */
9595
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
9696
struct page **pagepool);
97+
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
98+
struct page **pagepool);
9799
#endif

fs/erofs/decompressor.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,4 +379,10 @@ const struct z_erofs_decompressor erofs_decompressors[] = {
379379
.name = "lzma"
380380
},
381381
#endif
382+
#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
383+
[Z_EROFS_COMPRESSION_DEFLATE] = {
384+
.decompress = z_erofs_deflate_decompress,
385+
.name = "deflate"
386+
},
387+
#endif
382388
};

fs/erofs/decompressor_deflate.c

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
#include <linux/module.h>
3+
#include <linux/zlib.h>
4+
#include "compress.h"
5+
6+
struct z_erofs_deflate {
7+
struct z_erofs_deflate *next;
8+
struct z_stream_s z;
9+
u8 bounce[PAGE_SIZE];
10+
};
11+
12+
static DEFINE_SPINLOCK(z_erofs_deflate_lock);
13+
static unsigned int z_erofs_deflate_nstrms, z_erofs_deflate_avail_strms;
14+
static struct z_erofs_deflate *z_erofs_deflate_head;
15+
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_deflate_wq);
16+
17+
module_param_named(deflate_streams, z_erofs_deflate_nstrms, uint, 0444);
18+
19+
void z_erofs_deflate_exit(void)
20+
{
21+
/* there should be no running fs instance */
22+
while (z_erofs_deflate_avail_strms) {
23+
struct z_erofs_deflate *strm;
24+
25+
spin_lock(&z_erofs_deflate_lock);
26+
strm = z_erofs_deflate_head;
27+
if (!strm) {
28+
spin_unlock(&z_erofs_deflate_lock);
29+
continue;
30+
}
31+
z_erofs_deflate_head = NULL;
32+
spin_unlock(&z_erofs_deflate_lock);
33+
34+
while (strm) {
35+
struct z_erofs_deflate *n = strm->next;
36+
37+
vfree(strm->z.workspace);
38+
kfree(strm);
39+
--z_erofs_deflate_avail_strms;
40+
strm = n;
41+
}
42+
}
43+
}
44+
45+
int __init z_erofs_deflate_init(void)
46+
{
47+
/* by default, use # of possible CPUs instead */
48+
if (!z_erofs_deflate_nstrms)
49+
z_erofs_deflate_nstrms = num_possible_cpus();
50+
51+
for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms;
52+
++z_erofs_deflate_avail_strms) {
53+
struct z_erofs_deflate *strm;
54+
55+
strm = kzalloc(sizeof(*strm), GFP_KERNEL);
56+
if (!strm)
57+
goto out_failed;
58+
59+
/* XXX: in-kernel zlib cannot shrink windowbits currently */
60+
strm->z.workspace = vmalloc(zlib_inflate_workspacesize());
61+
if (!strm->z.workspace) {
62+
kfree(strm);
63+
goto out_failed;
64+
}
65+
66+
spin_lock(&z_erofs_deflate_lock);
67+
strm->next = z_erofs_deflate_head;
68+
z_erofs_deflate_head = strm;
69+
spin_unlock(&z_erofs_deflate_lock);
70+
}
71+
return 0;
72+
73+
out_failed:
74+
pr_err("failed to allocate zlib workspace\n");
75+
z_erofs_deflate_exit();
76+
return -ENOMEM;
77+
}
78+
79+
int z_erofs_load_deflate_config(struct super_block *sb,
80+
struct erofs_super_block *dsb,
81+
struct z_erofs_deflate_cfgs *dfl, int size)
82+
{
83+
if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) {
84+
erofs_err(sb, "invalid deflate cfgs, size=%u", size);
85+
return -EINVAL;
86+
}
87+
88+
if (dfl->windowbits > MAX_WBITS) {
89+
erofs_err(sb, "unsupported windowbits %u", dfl->windowbits);
90+
return -EOPNOTSUPP;
91+
}
92+
93+
erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!");
94+
return 0;
95+
}
96+
97+
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
98+
struct page **pagepool)
99+
{
100+
const unsigned int nrpages_out =
101+
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
102+
const unsigned int nrpages_in =
103+
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
104+
struct super_block *sb = rq->sb;
105+
unsigned int insz, outsz, pofs;
106+
struct z_erofs_deflate *strm;
107+
u8 *kin, *kout = NULL;
108+
bool bounced = false;
109+
int no = -1, ni = 0, j = 0, zerr, err;
110+
111+
/* 1. get the exact DEFLATE compressed size */
112+
kin = kmap_local_page(*rq->in);
113+
err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
114+
min_t(unsigned int, rq->inputsize,
115+
sb->s_blocksize - rq->pageofs_in));
116+
if (err) {
117+
kunmap_local(kin);
118+
return err;
119+
}
120+
121+
/* 2. get an available DEFLATE context */
122+
again:
123+
spin_lock(&z_erofs_deflate_lock);
124+
strm = z_erofs_deflate_head;
125+
if (!strm) {
126+
spin_unlock(&z_erofs_deflate_lock);
127+
wait_event(z_erofs_deflate_wq, READ_ONCE(z_erofs_deflate_head));
128+
goto again;
129+
}
130+
z_erofs_deflate_head = strm->next;
131+
spin_unlock(&z_erofs_deflate_lock);
132+
133+
/* 3. multi-call decompress */
134+
insz = rq->inputsize;
135+
outsz = rq->outputsize;
136+
zerr = zlib_inflateInit2(&strm->z, -MAX_WBITS);
137+
if (zerr != Z_OK) {
138+
err = -EIO;
139+
goto failed_zinit;
140+
}
141+
142+
pofs = rq->pageofs_out;
143+
strm->z.avail_in = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
144+
insz -= strm->z.avail_in;
145+
strm->z.next_in = kin + rq->pageofs_in;
146+
strm->z.avail_out = 0;
147+
148+
while (1) {
149+
if (!strm->z.avail_out) {
150+
if (++no >= nrpages_out || !outsz) {
151+
erofs_err(sb, "insufficient space for decompressed data");
152+
err = -EFSCORRUPTED;
153+
break;
154+
}
155+
156+
if (kout)
157+
kunmap_local(kout);
158+
strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
159+
outsz -= strm->z.avail_out;
160+
if (!rq->out[no]) {
161+
rq->out[no] = erofs_allocpage(pagepool,
162+
GFP_KERNEL | __GFP_NOFAIL);
163+
set_page_private(rq->out[no],
164+
Z_EROFS_SHORTLIVED_PAGE);
165+
}
166+
kout = kmap_local_page(rq->out[no]);
167+
strm->z.next_out = kout + pofs;
168+
pofs = 0;
169+
}
170+
171+
if (!strm->z.avail_in && insz) {
172+
if (++ni >= nrpages_in) {
173+
erofs_err(sb, "invalid compressed data");
174+
err = -EFSCORRUPTED;
175+
break;
176+
}
177+
178+
if (kout) { /* unlike kmap(), take care of the orders */
179+
j = strm->z.next_out - kout;
180+
kunmap_local(kout);
181+
}
182+
kunmap_local(kin);
183+
strm->z.avail_in = min_t(u32, insz, PAGE_SIZE);
184+
insz -= strm->z.avail_in;
185+
kin = kmap_local_page(rq->in[ni]);
186+
strm->z.next_in = kin;
187+
bounced = false;
188+
if (kout) {
189+
kout = kmap_local_page(rq->out[no]);
190+
strm->z.next_out = kout + j;
191+
}
192+
}
193+
194+
/*
195+
* Handle overlapping: Use bounced buffer if the compressed
196+
* data is under processing; Or use short-lived pages from the
197+
* on-stack pagepool where pages share among the same request
198+
* and not _all_ inplace I/O pages are needed to be doubled.
199+
*/
200+
if (!bounced && rq->out[no] == rq->in[ni]) {
201+
memcpy(strm->bounce, strm->z.next_in, strm->z.avail_in);
202+
strm->z.next_in = strm->bounce;
203+
bounced = true;
204+
}
205+
206+
for (j = ni + 1; j < nrpages_in; ++j) {
207+
struct page *tmppage;
208+
209+
if (rq->out[no] != rq->in[j])
210+
continue;
211+
212+
DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
213+
rq->in[j]));
214+
tmppage = erofs_allocpage(pagepool,
215+
GFP_KERNEL | __GFP_NOFAIL);
216+
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
217+
copy_highpage(tmppage, rq->in[j]);
218+
rq->in[j] = tmppage;
219+
}
220+
221+
zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH);
222+
if (zerr != Z_OK || !(outsz + strm->z.avail_out)) {
223+
if (zerr == Z_OK && rq->partial_decoding)
224+
break;
225+
if (zerr == Z_STREAM_END && !outsz)
226+
break;
227+
erofs_err(sb, "failed to decompress %d in[%u] out[%u]",
228+
zerr, rq->inputsize, rq->outputsize);
229+
err = -EFSCORRUPTED;
230+
break;
231+
}
232+
}
233+
234+
if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
235+
err = -EIO;
236+
if (kout)
237+
kunmap_local(kout);
238+
failed_zinit:
239+
kunmap_local(kin);
240+
/* 4. push back DEFLATE stream context to the global list */
241+
spin_lock(&z_erofs_deflate_lock);
242+
strm->next = z_erofs_deflate_head;
243+
z_erofs_deflate_head = strm;
244+
spin_unlock(&z_erofs_deflate_lock);
245+
wake_up(&z_erofs_deflate_wq);
246+
return err;
247+
}

fs/erofs/erofs_fs.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
1515
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
16+
#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
1617

1718
/*
1819
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -81,7 +82,8 @@ struct erofs_super_block {
8182
__u8 xattr_prefix_count; /* # of long xattr name prefixes */
8283
__le32 xattr_prefix_start; /* start of long xattr prefixes */
8384
__le64 packed_nid; /* nid of the special packed inode */
84-
__u8 reserved2[24];
85+
__u8 xattr_filter_reserved; /* reserved for xattr name filter */
86+
__u8 reserved2[23];
8587
};
8688

8789
/*
@@ -200,7 +202,7 @@ struct erofs_inode_extended {
200202
* for read-only fs, no need to introduce h_refcount
201203
*/
202204
struct erofs_xattr_ibody_header {
203-
__le32 h_reserved;
205+
__le32 h_name_filter; /* bit value 1 indicates not-present */
204206
__u8 h_shared_count;
205207
__u8 h_reserved2[7];
206208
__le32 h_shared_xattrs[]; /* shared xattr id array */
@@ -221,6 +223,10 @@ struct erofs_xattr_ibody_header {
221223
#define EROFS_XATTR_LONG_PREFIX 0x80
222224
#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f
223225

226+
#define EROFS_XATTR_FILTER_BITS 32
227+
#define EROFS_XATTR_FILTER_DEFAULT UINT32_MAX
228+
#define EROFS_XATTR_FILTER_SEED 0x25BBE08F
229+
224230
/* xattr entry (for both inline & shared xattrs) */
225231
struct erofs_xattr_entry {
226232
__u8 e_name_len; /* length of name */
@@ -289,6 +295,7 @@ struct erofs_dirent {
289295
enum {
290296
Z_EROFS_COMPRESSION_LZ4 = 0,
291297
Z_EROFS_COMPRESSION_LZMA = 1,
298+
Z_EROFS_COMPRESSION_DEFLATE = 2,
292299
Z_EROFS_COMPRESSION_MAX
293300
};
294301
#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
@@ -309,6 +316,12 @@ struct z_erofs_lzma_cfgs {
309316

310317
#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
311318

319+
/* 6 bytes (+ length field = 8 bytes) */
320+
struct z_erofs_deflate_cfgs {
321+
u8 windowbits; /* 8..15 for DEFLATE */
322+
u8 reserved[5];
323+
} __packed;
324+
312325
/*
313326
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
314327
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;

0 commit comments

Comments
 (0)