Skip to content

Commit 3a3af3a

Browse files
committed
Merge branch 'skb-coalescing-page_pool'
Liang Chen says: ==================== skbuff: Optimize SKB coalescing for page pool The combination of the following condition was excluded from skb coalescing: from->pp_recycle = 1 from->cloned = 1 to->pp_recycle = 1 With page pool in use, this combination can be quite common(ex. NetworkMananger may lead to the additional packet_type being registered, thus the cloning). In scenarios with a higher number of small packets, it can significantly affect the success rate of coalescing. This patchset aims to optimize this scenario and enable coalescing of this particular combination. That also involves supporting multiple users referencing the same fragment of a pp page to accomondate the need to increment the "from" SKB page's pp page reference count. Changes from v10: - re-number patches to 1/3, 2/3, 3/3 Changes from v9: - patch 1 was already applied - imporve description for patch 2 - make sure skb_pp_frag_ref only work for pp aware skbs ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 66fe896 + f7dc324 commit 3a3af3a

File tree

3 files changed

+52
-14
lines changed

3 files changed

+52
-14
lines changed

include/net/page_pool/helpers.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,11 @@ static inline long page_pool_unref_page(struct page *page, long nr)
277277
return ret;
278278
}
279279

280+
static inline void page_pool_ref_page(struct page *page)
281+
{
282+
atomic_long_inc(&page->pp_ref_count);
283+
}
284+
280285
static inline bool page_pool_is_last_ref(struct page *page)
281286
{
282287
/* If page_pool_unref_page() returns 0, we were the last user */

net/core/page_pool.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
#define DEFER_TIME (msecs_to_jiffies(1000))
2929
#define DEFER_WARN_INTERVAL (60 * HZ)
3030

31-
#define BIAS_MAX LONG_MAX
31+
#define BIAS_MAX (LONG_MAX >> 1)
3232

3333
#ifdef CONFIG_PAGE_POOL_STATS
3434
/* alloc_stat_inc is intended to be used in softirq context */

net/core/skbuff.c

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
890890
skb_get(list);
891891
}
892892

893+
static bool is_pp_page(struct page *page)
894+
{
895+
return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
896+
}
897+
893898
#if IS_ENABLED(CONFIG_PAGE_POOL)
894899
bool napi_pp_put_page(struct page *page, bool napi_safe)
895900
{
@@ -905,7 +910,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
905910
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
906911
* to avoid recycling the pfmemalloc page.
907912
*/
908-
if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
913+
if (unlikely(!is_pp_page(page)))
909914
return false;
910915

911916
pp = page->pp;
@@ -942,6 +947,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
942947
return napi_pp_put_page(virt_to_page(data), napi_safe);
943948
}
944949

950+
/**
951+
* skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
952+
* @skb: page pool aware skb
953+
*
954+
* Increase the fragment reference count (pp_ref_count) of a skb. This is
955+
* intended to gain fragment references only for page pool aware skbs,
956+
* i.e. when skb->pp_recycle is true, and not for fragments in a
957+
* non-pp-recycling skb. It has a fallback to increase references on normal
958+
* pages, as page pool aware skbs may also have normal page fragments.
959+
*/
960+
static int skb_pp_frag_ref(struct sk_buff *skb)
961+
{
962+
struct skb_shared_info *shinfo;
963+
struct page *head_page;
964+
int i;
965+
966+
if (!skb->pp_recycle)
967+
return -EINVAL;
968+
969+
shinfo = skb_shinfo(skb);
970+
971+
for (i = 0; i < shinfo->nr_frags; i++) {
972+
head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
973+
if (likely(is_pp_page(head_page)))
974+
page_pool_ref_page(head_page);
975+
else
976+
page_ref_inc(head_page);
977+
}
978+
return 0;
979+
}
980+
945981
static void skb_kfree_head(void *head, unsigned int end_offset)
946982
{
947983
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -5765,17 +5801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
57655801
return false;
57665802

57675803
/* In general, avoid mixing page_pool and non-page_pool allocated
5768-
* pages within the same SKB. Additionally avoid dealing with clones
5769-
* with page_pool pages, in case the SKB is using page_pool fragment
5770-
* references (page_pool_alloc_frag()). Since we only take full page
5771-
* references for cloned SKBs at the moment that would result in
5772-
* inconsistent reference counts.
5773-
* In theory we could take full references if @from is cloned and
5774-
* !@to->pp_recycle but its tricky (due to potential race with
5775-
* the clone disappearing) and rare, so not worth dealing with.
5804+
* pages within the same SKB. In theory we could take full
5805+
* references if @from is cloned and !@to->pp_recycle but its
5806+
* tricky (due to potential race with the clone disappearing) and
5807+
* rare, so not worth dealing with.
57765808
*/
5777-
if (to->pp_recycle != from->pp_recycle ||
5778-
(from->pp_recycle && skb_cloned(from)))
5809+
if (to->pp_recycle != from->pp_recycle)
57795810
return false;
57805811

57815812
if (len <= skb_tailroom(to)) {
@@ -5832,8 +5863,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
58325863
/* if the skb is not cloned this does nothing
58335864
* since we set nr_frags to 0.
58345865
*/
5835-
for (i = 0; i < from_shinfo->nr_frags; i++)
5836-
__skb_frag_ref(&from_shinfo->frags[i]);
5866+
if (skb_pp_frag_ref(from)) {
5867+
for (i = 0; i < from_shinfo->nr_frags; i++)
5868+
__skb_frag_ref(&from_shinfo->frags[i]);
5869+
}
58375870

58385871
to->truesize += delta;
58395872
to->len += len;

0 commit comments

Comments
 (0)