Skip to content

Commit 5f2b6a9

Browse files
choppsv1klassert
authored andcommitted
xfrm: iptfs: add skb-fragment sharing code
Avoid copying the inner packet data by sharing the skb data fragments from the output packet skb into new inner packet skb. Signed-off-by: Christian Hopps <[email protected]> Tested-by: Antony Antony <[email protected]> Signed-off-by: Steffen Klassert <[email protected]>
1 parent 3f33398 commit 5f2b6a9

File tree

1 file changed

+290
-6
lines changed

1 file changed

+290
-6
lines changed

net/xfrm/xfrm_iptfs.c

Lines changed: 290 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@
8181
#define XFRM_IPTFS_MIN_L3HEADROOM 128
8282
#define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)
8383

84+
/* Min to try to share outer iptfs skb data vs copying into new skb */
85+
#define IPTFS_PKT_SHARE_MIN 129
86+
8487
#define NSECS_IN_USEC 1000
8588

8689
#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
@@ -234,10 +237,254 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
234237
skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
235238
}
236239

240+
/**
241+
* struct iptfs_skb_frag_walk - use to track a walk through fragments
242+
* @fragi: current fragment index
243+
* @past: length of data in fragments before @fragi
244+
* @total: length of data in all fragments
245+
* @nr_frags: number of fragments present in array
246+
* @initial_offset: the value passed in to skb_prepare_frag_walk()
247+
* @frags: the page fragments inc. room for head page
248+
* @pp_recycle: copy of skb->pp_recycle
249+
*/
250+
struct iptfs_skb_frag_walk {
251+
u32 fragi;
252+
u32 past;
253+
u32 total;
254+
u32 nr_frags;
255+
u32 initial_offset;
256+
skb_frag_t frags[MAX_SKB_FRAGS + 1];
257+
bool pp_recycle;
258+
};
259+
260+
/**
261+
* iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
262+
* @skb: the skb to walk.
263+
* @initial_offset: start the walk @initial_offset into the skb.
264+
* @walk: the walk to initialize
265+
*
266+
* Future calls to skb_add_frags() will expect the @offset value to be at
267+
* least @initial_offset large.
268+
*/
269+
static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
270+
struct iptfs_skb_frag_walk *walk)
271+
{
272+
struct skb_shared_info *shinfo = skb_shinfo(skb);
273+
skb_frag_t *frag, *from;
274+
u32 i;
275+
276+
walk->initial_offset = initial_offset;
277+
walk->fragi = 0;
278+
walk->past = 0;
279+
walk->total = 0;
280+
walk->nr_frags = 0;
281+
walk->pp_recycle = skb->pp_recycle;
282+
283+
if (skb->head_frag) {
284+
if (initial_offset >= skb_headlen(skb)) {
285+
initial_offset -= skb_headlen(skb);
286+
} else {
287+
frag = &walk->frags[walk->nr_frags++];
288+
iptfs_skb_head_to_frag(skb, frag);
289+
frag->offset += initial_offset;
290+
frag->len -= initial_offset;
291+
walk->total += frag->len;
292+
initial_offset = 0;
293+
}
294+
} else {
295+
initial_offset -= skb_headlen(skb);
296+
}
297+
298+
for (i = 0; i < shinfo->nr_frags; i++) {
299+
from = &shinfo->frags[i];
300+
if (initial_offset >= from->len) {
301+
initial_offset -= from->len;
302+
continue;
303+
}
304+
frag = &walk->frags[walk->nr_frags++];
305+
*frag = *from;
306+
if (initial_offset) {
307+
frag->offset += initial_offset;
308+
frag->len -= initial_offset;
309+
initial_offset = 0;
310+
}
311+
walk->total += frag->len;
312+
}
313+
}
314+
315+
static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
316+
u32 offset)
317+
{
318+
/* Adjust offset to refer to internal walk values */
319+
offset -= walk->initial_offset;
320+
321+
/* Get to the correct fragment for offset */
322+
while (offset < walk->past) {
323+
walk->past -= walk->frags[--walk->fragi].len;
324+
if (offset >= walk->past)
325+
break;
326+
}
327+
while (offset >= walk->past + walk->frags[walk->fragi].len)
328+
walk->past += walk->frags[walk->fragi++].len;
329+
330+
/* offset now relative to this current frag */
331+
offset -= walk->past;
332+
return offset;
333+
}
334+
335+
/**
336+
* iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
337+
* @skb: skb to check for adding frags to
338+
* @walk: the walk that will be used as source for frags.
339+
* @offset: offset from beginning of original skb to start from.
340+
* @len: amount of data to add frag references to in @skb.
341+
*
342+
* Return: true if ok to add frags.
343+
*/
344+
static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
345+
struct iptfs_skb_frag_walk *walk,
346+
u32 offset, u32 len)
347+
{
348+
struct skb_shared_info *shinfo = skb_shinfo(skb);
349+
u32 fragi, nr_frags, fraglen;
350+
351+
if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
352+
return false;
353+
354+
/* Make offset relative to current frag after setting that */
355+
offset = iptfs_skb_reset_frag_walk(walk, offset);
356+
357+
/* Verify we have array space for the fragments we need to add */
358+
fragi = walk->fragi;
359+
nr_frags = shinfo->nr_frags;
360+
while (len && fragi < walk->nr_frags) {
361+
skb_frag_t *frag = &walk->frags[fragi];
362+
363+
fraglen = frag->len;
364+
if (offset) {
365+
fraglen -= offset;
366+
offset = 0;
367+
}
368+
if (++nr_frags > MAX_SKB_FRAGS)
369+
return false;
370+
if (len <= fraglen)
371+
return true;
372+
len -= fraglen;
373+
fragi++;
374+
}
375+
/* We may not copy all @len but what we have will fit. */
376+
return true;
377+
}
378+
379+
/**
380+
* iptfs_skb_add_frags() - add a range of fragment references into an skb
381+
* @skb: skb to add references into
382+
* @walk: the walk to add referenced fragments from.
383+
* @offset: offset from beginning of original skb to start from.
384+
* @len: amount of data to add frag references to in @skb.
385+
*
386+
* iptfs_skb_can_add_frags() should be called before this function to verify
387+
* that the destination @skb is compatible with the walk and has space in the
388+
* array for the to be added frag references.
389+
*
390+
* Return: The number of bytes not added to @skb b/c we reached the end of the
391+
* walk before adding all of @len.
392+
*/
393+
static int iptfs_skb_add_frags(struct sk_buff *skb,
394+
struct iptfs_skb_frag_walk *walk, u32 offset,
395+
u32 len)
396+
{
397+
struct skb_shared_info *shinfo = skb_shinfo(skb);
398+
u32 fraglen;
399+
400+
if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
401+
return len;
402+
403+
/* make offset relative to current frag after setting that */
404+
offset = iptfs_skb_reset_frag_walk(walk, offset);
405+
406+
while (len && walk->fragi < walk->nr_frags) {
407+
skb_frag_t *frag = &walk->frags[walk->fragi];
408+
skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];
409+
410+
*tofrag = *frag;
411+
if (offset) {
412+
tofrag->offset += offset;
413+
tofrag->len -= offset;
414+
offset = 0;
415+
}
416+
__skb_frag_ref(tofrag);
417+
shinfo->nr_frags++;
418+
419+
/* see if we are done */
420+
fraglen = tofrag->len;
421+
if (len < fraglen) {
422+
tofrag->len = len;
423+
skb->len += len;
424+
skb->data_len += len;
425+
return 0;
426+
}
427+
/* advance to next source fragment */
428+
len -= fraglen; /* careful, use dst bv_len */
429+
skb->len += fraglen; /* careful, " " " */
430+
skb->data_len += fraglen; /* careful, " " " */
431+
walk->past += frag->len; /* careful, use src bv_len */
432+
walk->fragi++;
433+
}
434+
return len;
435+
}
436+
237437
/* ================================== */
238438
/* IPTFS Receiving (egress) Functions */
239439
/* ================================== */
240440

441+
/**
442+
* iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
443+
* @tpl: template to create new skb from.
444+
* @walk: The source for fragments to add.
445+
* @off: The offset into @walk to add frags from, also used with @st and
446+
* @copy_len.
447+
* @len: The length of data to add covering frags from @walk into @skb.
448+
* This must be <= @skblen.
449+
* @st: The sequence state to copy from into the new head skb.
450+
* @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
451+
* linear space.
452+
*
453+
* Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
454+
* @st into the new skb linear space, and then add shared fragments from the
455+
* frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
456+
*
457+
* Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
458+
*/
459+
static struct sk_buff *
460+
iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
461+
u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
462+
{
463+
struct sk_buff *skb;
464+
465+
skb = iptfs_alloc_skb(tpl, copy_len, false);
466+
if (!skb)
467+
return NULL;
468+
469+
/* this should not normally be happening */
470+
if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
471+
len - copy_len)) {
472+
kfree_skb(skb);
473+
return NULL;
474+
}
475+
476+
if (copy_len &&
477+
skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
478+
XFRM_INC_STATS(dev_net(st->root_skb->dev),
479+
LINUX_MIB_XFRMINERROR);
480+
kfree_skb(skb);
481+
return NULL;
482+
}
483+
484+
iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
485+
return skb;
486+
}
487+
241488
/**
242489
* iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
243490
* @skblen: the total data size for `skb`.
@@ -423,6 +670,8 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
423670
struct skb_seq_state *st, struct sk_buff *skb,
424671
u32 data, u32 blkoff, struct list_head *list)
425672
{
673+
struct iptfs_skb_frag_walk _fragwalk;
674+
struct iptfs_skb_frag_walk *fragwalk = NULL;
426675
struct sk_buff *newskb = xtfs->ra_newskb;
427676
u32 remaining = skb->len - data;
428677
u32 runtlen = xtfs->ra_runtlen;
@@ -567,10 +816,26 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
567816
fraglen = min(blkoff, remaining);
568817
copylen = min(fraglen, ipremain);
569818

570-
/* copy fragment data into newskb */
571-
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) {
572-
XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR);
573-
goto abandon;
819+
/* If we may have the opportunity to share prepare a fragwalk. */
820+
if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
821+
(skb->head_frag || skb->len == skb->data_len) &&
822+
skb->pp_recycle == newskb->pp_recycle) {
823+
fragwalk = &_fragwalk;
824+
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
825+
}
826+
827+
/* Try share then copy. */
828+
if (fragwalk &&
829+
iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
830+
iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
831+
} else {
832+
/* copy fragment data into newskb */
833+
if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
834+
copylen)) {
835+
XFRM_INC_STATS(xs_net(xtfs->x),
836+
LINUX_MIB_XFRMINBUFFERERROR);
837+
goto abandon;
838+
}
574839
}
575840

576841
if (copylen < ipremain) {
@@ -601,6 +866,8 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
601866
struct list_head *sublist)
602867
{
603868
u8 hbytes[sizeof(struct ipv6hdr)];
869+
struct iptfs_skb_frag_walk _fragwalk;
870+
struct iptfs_skb_frag_walk *fragwalk = NULL;
604871
struct sk_buff *defer, *first_skb, *next, *skb;
605872
const unsigned char *old_mac;
606873
struct xfrm_iptfs_data *xtfs;
@@ -694,6 +961,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
694961
} else {
695962
first_skb = skb;
696963
first_iplen = iplen;
964+
fragwalk = NULL;
697965

698966
/* We are going to skip over `data` bytes to reach the
699967
* start of the IP header of `iphlen` len for `iplen`
@@ -745,6 +1013,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
7451013
/* all pointers could be changed now reset walk */
7461014
skb_abort_seq_read(skbseq);
7471015
skb_prepare_seq_read(skb, data, tail, skbseq);
1016+
} else if (skb->head_frag &&
1017+
/* We have the IP header right now */
1018+
remaining >= iphlen) {
1019+
fragwalk = &_fragwalk;
1020+
iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
1021+
defer = skb;
1022+
skb = NULL;
7481023
} else {
7491024
/* We couldn't reuse the input skb so allocate a
7501025
* new one.
@@ -760,8 +1035,17 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data,
7601035

7611036
capturelen = min(iplen, remaining);
7621037
if (!skb) {
763-
skb = iptfs_pskb_extract_seq(iplen, skbseq, data,
764-
capturelen);
1038+
if (!fragwalk ||
1039+
/* Large enough to be worth sharing */
1040+
iplen < IPTFS_PKT_SHARE_MIN ||
1041+
/* Have IP header + some data to share. */
1042+
capturelen <= iphlen ||
1043+
/* Try creating skb and adding frags */
1044+
!(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
1045+
data, capturelen,
1046+
skbseq, iphlen))) {
1047+
skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen);
1048+
}
7651049
if (!skb) {
7661050
/* skip to next packet or done */
7671051
data += capturelen;

0 commit comments

Comments
 (0)