Skip to content

Commit 47e9bff

Browse files
committed
Merge tag 'erofs-for-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "The LZ4 global buffer count is now configurable instead of the previous per-CPU buffers, which is useful for bare metals with hundreds of CPUs. A reserved buffer pool for LZ4 decompression can also be enabled to minimize the tail allocation latencies under the low memory scenarios with heavy memory pressure. In addition, Zstandard algorithm is now supported as an alternative since it has been requested by users for a while. There are some random cleanups as usual. Summary: - Make LZ4 global buffers configurable instead of per-CPU buffers - Add a reserved buffer pool for LZ4 decompression for lower latencies - Support Zstandard compression algorithm as an alternative - Derive fsid from on-disk UUID for .statfs() if possible - Minor cleanups" * tag 'erofs-for-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: Zstandard compression support erofs: clean up z_erofs_load_full_lcluster() erofs: derive fsid from on-disk UUID for .statfs() if possible erofs: add a reserved buffer pool for lz4 decompression erofs: do not use pagepool in z_erofs_gbuf_growsize() erofs: rename per-CPU buffers to global buffer pool and make it configurable erofs: rename utils.c to zutil.c
2 parents 1b10b39 + 7c35de4 commit 47e9bff

File tree

11 files changed

+555
-212
lines changed

11 files changed

+555
-212
lines changed

fs/erofs/Kconfig

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,21 @@ config EROFS_FS_ZIP_DEFLATE
112112

113113
If unsure, say N.
114114

115+
config EROFS_FS_ZIP_ZSTD
116+
bool "EROFS Zstandard compressed data support"
117+
depends on EROFS_FS_ZIP
118+
select ZSTD_DECOMPRESS
119+
help
120+
Saying Y here includes support for reading EROFS file systems
121+
containing Zstandard compressed data. It gives better compression
122+
ratios than the default LZ4 format, while it costs more CPU
123+
overhead.
124+
125+
Zstandard support is an experimental feature for now and so most
126+
file systems will be readable without selecting this option.
127+
128+
If unsure, say N.
129+
115130
config EROFS_FS_ONDEMAND
116131
bool "EROFS fscache-based on-demand read support"
117132
depends on EROFS_FS

fs/erofs/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22

33
obj-$(CONFIG_EROFS_FS) += erofs.o
4-
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
4+
erofs-objs := super.o inode.o data.o namei.o dir.o sysfs.o
55
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
6-
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
6+
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
77
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
88
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
9+
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
910
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o

fs/erofs/compress.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,12 @@ int z_erofs_load_lzma_config(struct super_block *sb,
9090
struct erofs_super_block *dsb, void *data, int size);
9191
int z_erofs_load_deflate_config(struct super_block *sb,
9292
struct erofs_super_block *dsb, void *data, int size);
93+
int z_erofs_load_zstd_config(struct super_block *sb,
94+
struct erofs_super_block *dsb, void *data, int size);
9395
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
9496
struct page **pagepool);
9597
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
9698
struct page **pagepool);
99+
int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
100+
struct page **pgpl);
97101
#endif

fs/erofs/decompressor.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
5454
sbi->lz4.max_distance_pages = distance ?
5555
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
5656
LZ4_MAX_DISTANCE_PAGES;
57-
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
57+
return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks);
5858
}
5959

6060
/*
@@ -111,7 +111,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
111111
victim = availables[--top];
112112
get_page(victim);
113113
} else {
114-
victim = erofs_allocpage(pagepool, rq->gfp);
114+
victim = __erofs_allocpage(pagepool, rq->gfp, true);
115115
if (!victim)
116116
return -ENOMEM;
117117
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
@@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
159159
docopy:
160160
/* Or copy compressed data which can be overlapped to per-CPU buffer */
161161
in = rq->in;
162-
src = erofs_get_pcpubuf(ctx->inpages);
162+
src = z_erofs_get_gbuf(ctx->inpages);
163163
if (!src) {
164164
DBG_BUGON(1);
165165
kunmap_local(inpage);
@@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
260260
} else if (maptype == 1) {
261261
vm_unmap_ram(src, ctx->inpages);
262262
} else if (maptype == 2) {
263-
erofs_put_pcpubuf(src);
263+
z_erofs_put_gbuf(src);
264264
} else if (maptype != 3) {
265265
DBG_BUGON(1);
266266
return -EFAULT;
@@ -399,6 +399,13 @@ const struct z_erofs_decompressor erofs_decompressors[] = {
399399
.name = "deflate"
400400
},
401401
#endif
402+
#ifdef CONFIG_EROFS_FS_ZIP_ZSTD
403+
[Z_EROFS_COMPRESSION_ZSTD] = {
404+
.config = z_erofs_load_zstd_config,
405+
.decompress = z_erofs_zstd_decompress,
406+
.name = "zstd"
407+
},
408+
#endif
402409
};
403410

404411
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb)

fs/erofs/decompressor_zstd.c

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
#include <linux/zstd.h>
3+
#include "compress.h"
4+
5+
struct z_erofs_zstd {
6+
struct z_erofs_zstd *next;
7+
u8 bounce[PAGE_SIZE];
8+
void *wksp;
9+
unsigned int wkspsz;
10+
};
11+
12+
static DEFINE_SPINLOCK(z_erofs_zstd_lock);
13+
static unsigned int z_erofs_zstd_max_dictsize;
14+
static unsigned int z_erofs_zstd_nstrms, z_erofs_zstd_avail_strms;
15+
static struct z_erofs_zstd *z_erofs_zstd_head;
16+
static DECLARE_WAIT_QUEUE_HEAD(z_erofs_zstd_wq);
17+
18+
module_param_named(zstd_streams, z_erofs_zstd_nstrms, uint, 0444);
19+
20+
static struct z_erofs_zstd *z_erofs_isolate_strms(bool all)
21+
{
22+
struct z_erofs_zstd *strm;
23+
24+
again:
25+
spin_lock(&z_erofs_zstd_lock);
26+
strm = z_erofs_zstd_head;
27+
if (!strm) {
28+
spin_unlock(&z_erofs_zstd_lock);
29+
wait_event(z_erofs_zstd_wq, READ_ONCE(z_erofs_zstd_head));
30+
goto again;
31+
}
32+
z_erofs_zstd_head = all ? NULL : strm->next;
33+
spin_unlock(&z_erofs_zstd_lock);
34+
return strm;
35+
}
36+
37+
void z_erofs_zstd_exit(void)
38+
{
39+
while (z_erofs_zstd_avail_strms) {
40+
struct z_erofs_zstd *strm, *n;
41+
42+
for (strm = z_erofs_isolate_strms(true); strm; strm = n) {
43+
n = strm->next;
44+
45+
kvfree(strm->wksp);
46+
kfree(strm);
47+
--z_erofs_zstd_avail_strms;
48+
}
49+
}
50+
}
51+
52+
int __init z_erofs_zstd_init(void)
53+
{
54+
/* by default, use # of possible CPUs instead */
55+
if (!z_erofs_zstd_nstrms)
56+
z_erofs_zstd_nstrms = num_possible_cpus();
57+
58+
for (; z_erofs_zstd_avail_strms < z_erofs_zstd_nstrms;
59+
++z_erofs_zstd_avail_strms) {
60+
struct z_erofs_zstd *strm;
61+
62+
strm = kzalloc(sizeof(*strm), GFP_KERNEL);
63+
if (!strm) {
64+
z_erofs_zstd_exit();
65+
return -ENOMEM;
66+
}
67+
spin_lock(&z_erofs_zstd_lock);
68+
strm->next = z_erofs_zstd_head;
69+
z_erofs_zstd_head = strm;
70+
spin_unlock(&z_erofs_zstd_lock);
71+
}
72+
return 0;
73+
}
74+
75+
int z_erofs_load_zstd_config(struct super_block *sb,
76+
struct erofs_super_block *dsb, void *data, int size)
77+
{
78+
static DEFINE_MUTEX(zstd_resize_mutex);
79+
struct z_erofs_zstd_cfgs *zstd = data;
80+
unsigned int dict_size, wkspsz;
81+
struct z_erofs_zstd *strm, *head = NULL;
82+
void *wksp;
83+
84+
if (!zstd || size < sizeof(struct z_erofs_zstd_cfgs) || zstd->format) {
85+
erofs_err(sb, "unsupported zstd format, size=%u", size);
86+
return -EINVAL;
87+
}
88+
89+
if (zstd->windowlog > ilog2(Z_EROFS_ZSTD_MAX_DICT_SIZE) - 10) {
90+
erofs_err(sb, "unsupported zstd window log %u", zstd->windowlog);
91+
return -EINVAL;
92+
}
93+
dict_size = 1U << (zstd->windowlog + 10);
94+
95+
/* in case 2 z_erofs_load_zstd_config() race to avoid deadlock */
96+
mutex_lock(&zstd_resize_mutex);
97+
if (z_erofs_zstd_max_dictsize >= dict_size) {
98+
mutex_unlock(&zstd_resize_mutex);
99+
return 0;
100+
}
101+
102+
/* 1. collect/isolate all streams for the following check */
103+
while (z_erofs_zstd_avail_strms) {
104+
struct z_erofs_zstd *n;
105+
106+
for (strm = z_erofs_isolate_strms(true); strm; strm = n) {
107+
n = strm->next;
108+
strm->next = head;
109+
head = strm;
110+
--z_erofs_zstd_avail_strms;
111+
}
112+
}
113+
114+
/* 2. walk each isolated stream and grow max dict_size if needed */
115+
wkspsz = zstd_dstream_workspace_bound(dict_size);
116+
for (strm = head; strm; strm = strm->next) {
117+
wksp = kvmalloc(wkspsz, GFP_KERNEL);
118+
if (!wksp)
119+
break;
120+
kvfree(strm->wksp);
121+
strm->wksp = wksp;
122+
strm->wkspsz = wkspsz;
123+
}
124+
125+
/* 3. push back all to the global list and update max dict_size */
126+
spin_lock(&z_erofs_zstd_lock);
127+
DBG_BUGON(z_erofs_zstd_head);
128+
z_erofs_zstd_head = head;
129+
spin_unlock(&z_erofs_zstd_lock);
130+
z_erofs_zstd_avail_strms = z_erofs_zstd_nstrms;
131+
wake_up_all(&z_erofs_zstd_wq);
132+
if (!strm)
133+
z_erofs_zstd_max_dictsize = dict_size;
134+
mutex_unlock(&zstd_resize_mutex);
135+
return strm ? -ENOMEM : 0;
136+
}
137+
138+
int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
139+
struct page **pgpl)
140+
{
141+
const unsigned int nrpages_out =
142+
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
143+
const unsigned int nrpages_in =
144+
PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
145+
zstd_dstream *stream;
146+
struct super_block *sb = rq->sb;
147+
unsigned int insz, outsz, pofs;
148+
struct z_erofs_zstd *strm;
149+
zstd_in_buffer in_buf = { NULL, 0, 0 };
150+
zstd_out_buffer out_buf = { NULL, 0, 0 };
151+
u8 *kin, *kout = NULL;
152+
bool bounced = false;
153+
int no = -1, ni = 0, j = 0, zerr, err;
154+
155+
/* 1. get the exact compressed size */
156+
kin = kmap_local_page(*rq->in);
157+
err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
158+
min_t(unsigned int, rq->inputsize,
159+
sb->s_blocksize - rq->pageofs_in));
160+
if (err) {
161+
kunmap_local(kin);
162+
return err;
163+
}
164+
165+
/* 2. get an available ZSTD context */
166+
strm = z_erofs_isolate_strms(false);
167+
168+
/* 3. multi-call decompress */
169+
insz = rq->inputsize;
170+
outsz = rq->outputsize;
171+
stream = zstd_init_dstream(z_erofs_zstd_max_dictsize, strm->wksp, strm->wkspsz);
172+
if (!stream) {
173+
err = -EIO;
174+
goto failed_zinit;
175+
}
176+
177+
pofs = rq->pageofs_out;
178+
in_buf.size = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
179+
insz -= in_buf.size;
180+
in_buf.src = kin + rq->pageofs_in;
181+
do {
182+
if (out_buf.size == out_buf.pos) {
183+
if (++no >= nrpages_out || !outsz) {
184+
erofs_err(sb, "insufficient space for decompressed data");
185+
err = -EFSCORRUPTED;
186+
break;
187+
}
188+
189+
if (kout)
190+
kunmap_local(kout);
191+
out_buf.size = min_t(u32, outsz, PAGE_SIZE - pofs);
192+
outsz -= out_buf.size;
193+
if (!rq->out[no]) {
194+
rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
195+
if (!rq->out[no]) {
196+
kout = NULL;
197+
err = -ENOMEM;
198+
break;
199+
}
200+
set_page_private(rq->out[no],
201+
Z_EROFS_SHORTLIVED_PAGE);
202+
}
203+
kout = kmap_local_page(rq->out[no]);
204+
out_buf.dst = kout + pofs;
205+
out_buf.pos = 0;
206+
pofs = 0;
207+
}
208+
209+
if (in_buf.size == in_buf.pos && insz) {
210+
if (++ni >= nrpages_in) {
211+
erofs_err(sb, "invalid compressed data");
212+
err = -EFSCORRUPTED;
213+
break;
214+
}
215+
216+
if (kout) /* unlike kmap(), take care of the orders */
217+
kunmap_local(kout);
218+
kunmap_local(kin);
219+
in_buf.size = min_t(u32, insz, PAGE_SIZE);
220+
insz -= in_buf.size;
221+
kin = kmap_local_page(rq->in[ni]);
222+
in_buf.src = kin;
223+
in_buf.pos = 0;
224+
bounced = false;
225+
if (kout) {
226+
j = (u8 *)out_buf.dst - kout;
227+
kout = kmap_local_page(rq->out[no]);
228+
out_buf.dst = kout + j;
229+
}
230+
}
231+
232+
/*
233+
* Handle overlapping: Use bounced buffer if the compressed
234+
* data is under processing; Or use short-lived pages from the
235+
* on-stack pagepool where pages share among the same request
236+
* and not _all_ inplace I/O pages are needed to be doubled.
237+
*/
238+
if (!bounced && rq->out[no] == rq->in[ni]) {
239+
memcpy(strm->bounce, in_buf.src, in_buf.size);
240+
in_buf.src = strm->bounce;
241+
bounced = true;
242+
}
243+
244+
for (j = ni + 1; j < nrpages_in; ++j) {
245+
struct page *tmppage;
246+
247+
if (rq->out[no] != rq->in[j])
248+
continue;
249+
tmppage = erofs_allocpage(pgpl, rq->gfp);
250+
if (!tmppage) {
251+
err = -ENOMEM;
252+
goto failed;
253+
}
254+
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
255+
copy_highpage(tmppage, rq->in[j]);
256+
rq->in[j] = tmppage;
257+
}
258+
zerr = zstd_decompress_stream(stream, &out_buf, &in_buf);
259+
if (zstd_is_error(zerr) || (!zerr && outsz)) {
260+
erofs_err(sb, "failed to decompress in[%u] out[%u]: %s",
261+
rq->inputsize, rq->outputsize,
262+
zerr ? zstd_get_error_name(zerr) : "unexpected end of stream");
263+
err = -EFSCORRUPTED;
264+
break;
265+
}
266+
} while (outsz || out_buf.pos < out_buf.size);
267+
failed:
268+
if (kout)
269+
kunmap_local(kout);
270+
failed_zinit:
271+
kunmap_local(kin);
272+
/* 4. push back ZSTD stream context to the global list */
273+
spin_lock(&z_erofs_zstd_lock);
274+
strm->next = z_erofs_zstd_head;
275+
z_erofs_zstd_head = strm;
276+
spin_unlock(&z_erofs_zstd_lock);
277+
wake_up(&z_erofs_zstd_wq);
278+
return err;
279+
}

0 commit comments

Comments
 (0)