Skip to content

Commit 94ae8c3

Browse files
ematsumiyasmfrench
authored andcommitted
smb: client: compress: LZ77 code improvements cleanup
- Check data compressibility with some heuristics (copied from btrfs): - should_compress() final decision is is_compressible(data) - Cleanup compress/lz77.h leaving only lz77_compress() exposed: - Move parts to compress/lz77.c, while removing the rest of it because they were either unused, used only once, were implemented wrong (thanks to David Howells for the help) - Updated the compression parameters (still compatible with Windows implementation) trading off ~20% compression ratio for ~40% performance: - min match len: 3 -> 4 - max distance: 8KiB -> 1KiB - hash table type: u32 * -> u64 * Known bugs: This implementation currently works fine in general, but breaks with some payloads used during testing. Investigation ongoing, to be fixed in a next commit. Signed-off-by: Enzo Matsumiya <[email protected]> Co-developed-by: David Howells <[email protected]> Signed-off-by: David Howells <[email protected]> Signed-off-by: Steve French <[email protected]>
1 parent f046d71 commit 94ae8c3

File tree

4 files changed

+532
-463
lines changed

4 files changed

+532
-463
lines changed

fs/smb/client/compress.c

Lines changed: 353 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/slab.h>
1616
#include <linux/kernel.h>
1717
#include <linux/uio.h>
18+
#include <linux/sort.h>
1819

1920
#include "cifsglob.h"
2021
#include "../common/smb2pdu.h"
@@ -24,27 +25,361 @@
2425
#include "compress/lz77.h"
2526
#include "compress.h"
2627

27-
int smb_compress(void *buf, const void *data, size_t *len)
28+
/*
29+
* The heuristic_*() functions below try to determine data compressibility.
30+
*
31+
* Derived from fs/btrfs/compression.c, changing coding style, some parameters, and removing
32+
* unused parts.
33+
*
34+
* Read that file for better and more detailed explanation of the calculations.
35+
*
36+
* The algorithms are ran in a collected sample of the input (uncompressed) data.
37+
* The sample is formed of 2K reads in PAGE_SIZE intervals, with a maximum size of 4M.
38+
*
39+
* Parsing the sample goes from "low-hanging fruits" (fastest algorithms, likely compressible)
40+
* to "need more analysis" (likely uncompressible).
41+
*/
42+
43+
struct bucket {
44+
unsigned int count;
45+
};
46+
47+
/**
48+
* calc_shannon_entropy() - Compute Shannon entropy of the sampled data.
49+
* @bkt: Bytes counts of the sample.
50+
* @slen: Size of the sample.
51+
*
52+
* Return: true if the level (percentage of number of bits that would be required to
53+
* compress the data) is below the minimum threshold.
54+
*
55+
* Note:
56+
* There _is_ an entropy level here that's > 65 (minimum threshold) that would indicate a
57+
* possibility of compression, but compressing, or even further analysing, it would waste so much
58+
* resources that it's simply not worth it.
59+
*
60+
* Also Shannon entropy is the last computed heuristic; if we got this far and ended up
61+
* with uncertainty, just stay on the safe side and call it uncompressible.
62+
*/
63+
static bool calc_shannon_entropy(struct bucket *bkt, size_t slen)
64+
{
65+
const size_t threshold = 65, max_entropy = 8 * ilog2(16);
66+
size_t i, p, p2, len, sum = 0;
67+
68+
#define pow4(n) (n * n * n * n)
69+
len = ilog2(pow4(slen));
70+
71+
for (i = 0; i < 256 && bkt[i].count > 0; i++) {
72+
p = bkt[i].count;
73+
p2 = ilog2(pow4(p));
74+
sum += p * (len - p2);
75+
}
76+
77+
sum /= slen;
78+
79+
return ((sum * 100 / max_entropy) <= threshold);
80+
}
81+
82+
/**
83+
* calc_byte_distribution() - Compute byte distribution on the sampled data.
84+
* @bkt: Byte counts of the sample.
85+
* @slen: Size of the sample.
86+
*
87+
* Return:
88+
* 1: High probability (normal (Gaussian) distribution) of the data being compressible.
89+
* 0: A "hard no" for compression -- either a computed uniform distribution of the bytes (e.g.
90+
* random or encrypted data), or calc_shannon_entropy() returned false (see above).
91+
* 2: When computed byte distribution resulted in "low > n < high" grounds.
92+
* calc_shannon_entropy() should be used for a final decision.
93+
*/
94+
static int calc_byte_distribution(struct bucket *bkt, size_t slen)
2895
{
29-
struct smb2_compression_hdr *hdr;
30-
size_t buf_len, data_len;
96+
const size_t low = 64, high = 200, threshold = slen * 90 / 100;
97+
size_t sum = 0;
98+
int i;
99+
100+
for (i = 0; i < low; i++)
101+
sum += bkt[i].count;
102+
103+
if (sum > threshold)
104+
return i;
105+
106+
for (; i < high && bkt[i].count > 0; i++) {
107+
sum += bkt[i].count;
108+
if (sum > threshold)
109+
break;
110+
}
111+
112+
if (i <= low)
113+
return 1;
114+
115+
if (i >= high)
116+
return 0;
117+
118+
return 2;
119+
}
120+
121+
static bool check_ascii_bytes(const struct bucket *bkt)
122+
{
123+
const size_t threshold = 64;
124+
size_t count = 0;
125+
int i;
126+
127+
for (i = 0; i < threshold; i++)
128+
if (bkt[i].count > 0)
129+
count++;
130+
131+
for (; i < 256; i++) {
132+
if (bkt[i].count > 0) {
133+
count++;
134+
if (count > threshold)
135+
break;
136+
}
137+
}
138+
139+
return (count < threshold);
140+
}
141+
142+
static bool check_repeated_data(const u8 *sample, size_t len)
143+
{
144+
size_t s = len / 2;
145+
146+
return (!memcmp(&sample[0], &sample[s], s));
147+
}
148+
149+
static int cmp_bkt(const void *_a, const void *_b)
150+
{
151+
const struct bucket *a = _a, *b = _b;
152+
153+
/* Reverse sort. */
154+
if (a->count > b->count)
155+
return -1;
156+
157+
return 1;
158+
}
159+
160+
/*
161+
* TODO:
162+
* Support other iter types, if required.
163+
* Only ITER_XARRAY is supported for now.
164+
*/
165+
static int collect_sample(const struct iov_iter *iter, ssize_t max, u8 *sample)
166+
{
167+
struct folio *folios[16], *folio;
168+
unsigned int nr, i, j, npages;
169+
loff_t start = iter->xarray_start + iter->iov_offset;
170+
pgoff_t last, index = start / PAGE_SIZE;
171+
size_t len, off, foff;
172+
ssize_t ret = 0;
173+
void *p;
174+
int s = 0;
175+
176+
last = (start + max - 1) / PAGE_SIZE;
177+
do {
178+
nr = xa_extract(iter->xarray, (void **)folios, index, last, ARRAY_SIZE(folios),
179+
XA_PRESENT);
180+
if (nr == 0)
181+
return -EIO;
182+
183+
for (i = 0; i < nr; i++) {
184+
folio = folios[i];
185+
npages = folio_nr_pages(folio);
186+
foff = start - folio_pos(folio);
187+
off = foff % PAGE_SIZE;
188+
189+
for (j = foff / PAGE_SIZE; j < npages; j++) {
190+
size_t len2;
191+
192+
len = min_t(size_t, max, PAGE_SIZE - off);
193+
len2 = min_t(size_t, len, SZ_2K);
194+
195+
p = kmap_local_page(folio_page(folio, j));
196+
memcpy(&sample[s], p, len2);
197+
kunmap_local(p);
198+
199+
if (ret < 0)
200+
return ret;
201+
202+
s += len2;
203+
204+
if (len2 < SZ_2K || s >= max - SZ_2K)
205+
return s;
206+
207+
max -= len;
208+
if (max <= 0)
209+
return s;
210+
211+
start += len;
212+
off = 0;
213+
index++;
214+
}
215+
}
216+
} while (nr == ARRAY_SIZE(folios));
217+
218+
return s;
219+
}
220+
221+
/**
222+
* is_compressible() - Determines if a chunk of data is compressible.
223+
* @data: Iterator containing uncompressed data.
224+
*
225+
* Return:
226+
* 0: @data is not compressible
227+
* 1: @data is compressible
228+
* -ENOMEM: failed to allocate memory for sample buffer
229+
*
230+
* Tests shows that this function is quite reliable in predicting data compressibility,
231+
* matching close to 1:1 with the behaviour of LZ77 compression success and failures.
232+
*/
233+
static int is_compressible(const struct iov_iter *data)
234+
{
235+
const size_t read_size = SZ_2K, bkt_size = 256, max = SZ_4M;
236+
struct bucket *bkt;
237+
int i = 0, ret = 0;
238+
size_t len;
239+
u8 *sample;
240+
241+
len = iov_iter_count(data);
242+
if (len < read_size)
243+
return 0;
244+
245+
if (len - read_size > max)
246+
len = max;
247+
248+
sample = kvzalloc(len, GFP_KERNEL);
249+
if (!sample)
250+
return -ENOMEM;
251+
252+
/* Sample 2K bytes per page of the uncompressed data. */
253+
ret = collect_sample(data, len, sample);
254+
if (ret < 0)
255+
goto out;
256+
257+
len = ret;
258+
ret = 1;
259+
260+
if (check_repeated_data(sample, len))
261+
goto out;
262+
263+
bkt = kcalloc(bkt_size, sizeof(*bkt), GFP_KERNEL);
264+
if (!bkt) {
265+
kvfree(sample);
266+
return -ENOMEM;
267+
}
268+
269+
for (i = 0; i < len; i++)
270+
bkt[sample[i]].count++;
271+
272+
if (check_ascii_bytes(bkt))
273+
goto out;
274+
275+
/* Sort in descending order */
276+
sort(bkt, bkt_size, sizeof(*bkt), cmp_bkt, NULL);
277+
278+
ret = calc_byte_distribution(bkt, len);
279+
if (ret != 2)
280+
goto out;
281+
282+
ret = calc_shannon_entropy(bkt, len);
283+
out:
284+
kvfree(sample);
285+
kfree(bkt);
286+
287+
WARN(ret < 0, "%s: ret=%d\n", __func__, ret);
288+
289+
return !!ret;
290+
}
291+
292+
bool should_compress(const struct cifs_tcon *tcon, const struct smb_rqst *rq)
293+
{
294+
const struct smb2_hdr *shdr = rq->rq_iov->iov_base;
295+
296+
if (unlikely(!tcon || !tcon->ses || !tcon->ses->server))
297+
return false;
298+
299+
if (!tcon->ses->server->compression.enabled)
300+
return false;
301+
302+
if (!(tcon->share_flags & SMB2_SHAREFLAG_COMPRESS_DATA))
303+
return false;
304+
305+
if (shdr->Command == SMB2_WRITE) {
306+
const struct smb2_write_req *wreq = rq->rq_iov->iov_base;
307+
308+
if (wreq->Length < SMB_COMPRESS_MIN_LEN)
309+
return false;
310+
311+
return is_compressible(&rq->rq_iter);
312+
}
313+
314+
return (shdr->Command == SMB2_READ);
315+
}
316+
317+
int smb_compress(struct TCP_Server_Info *server, struct smb_rqst *rq, compress_send_fn send_fn)
318+
{
319+
struct iov_iter iter;
320+
u32 slen, dlen;
321+
void *src, *dst;
31322
int ret;
32323

33-
buf_len = sizeof(struct smb2_write_req);
34-
data_len = *len;
35-
*len = 0;
36-
37-
hdr = buf;
38-
hdr->ProtocolId = SMB2_COMPRESSION_TRANSFORM_ID;
39-
hdr->OriginalCompressedSegmentSize = cpu_to_le32(data_len);
40-
hdr->Offset = cpu_to_le32(buf_len);
41-
hdr->Flags = SMB2_COMPRESSION_FLAG_NONE;
42-
hdr->CompressionAlgorithm = SMB3_COMPRESS_LZ77;
43-
44-
/* XXX: add other algs here as they're implemented */
45-
ret = lz77_compress(data, data_len, buf + SMB_COMPRESS_HDR_LEN + buf_len, &data_len);
46-
if (!ret)
47-
*len = SMB_COMPRESS_HDR_LEN + buf_len + data_len;
324+
if (!server || !rq || !rq->rq_iov || !rq->rq_iov->iov_base)
325+
return -EINVAL;
326+
327+
if (rq->rq_iov->iov_len != sizeof(struct smb2_write_req))
328+
return -EINVAL;
329+
330+
slen = iov_iter_count(&rq->rq_iter);
331+
src = kvzalloc(slen, GFP_KERNEL);
332+
if (!src) {
333+
ret = -ENOMEM;
334+
goto err_free;
335+
}
336+
337+
/* Keep the original iter intact. */
338+
iter = rq->rq_iter;
339+
340+
if (!copy_from_iter_full(src, slen, &iter)) {
341+
ret = -EIO;
342+
goto err_free;
343+
}
344+
345+
/*
346+
* This is just overprovisioning, as the algorithm will error out if @dst reaches 7/8
347+
* of @slen.
348+
*/
349+
dlen = slen;
350+
dst = kvzalloc(dlen, GFP_KERNEL);
351+
if (!dst) {
352+
ret = -ENOMEM;
353+
goto err_free;
354+
}
355+
356+
ret = lz77_compress(src, slen, dst, &dlen);
357+
if (!ret) {
358+
struct smb2_compression_hdr hdr = { 0 };
359+
struct smb_rqst comp_rq = { .rq_nvec = 3, };
360+
struct kvec iov[3];
361+
362+
hdr.ProtocolId = SMB2_COMPRESSION_TRANSFORM_ID;
363+
hdr.OriginalCompressedSegmentSize = cpu_to_le32(slen);
364+
hdr.CompressionAlgorithm = SMB3_COMPRESS_LZ77;
365+
hdr.Flags = SMB2_COMPRESSION_FLAG_NONE;
366+
hdr.Offset = cpu_to_le32(rq->rq_iov[0].iov_len);
367+
368+
iov[0].iov_base = &hdr;
369+
iov[0].iov_len = sizeof(hdr);
370+
iov[1] = rq->rq_iov[0];
371+
iov[2].iov_base = dst;
372+
iov[2].iov_len = dlen;
373+
374+
comp_rq.rq_iov = iov;
375+
376+
ret = send_fn(server, 1, &comp_rq);
377+
} else if (ret == -EMSGSIZE || dlen >= slen) {
378+
ret = send_fn(server, 1, rq);
379+
}
380+
err_free:
381+
kvfree(dst);
382+
kvfree(src);
48383

49384
return ret;
50385
}

0 commit comments

Comments
 (0)