Skip to content

Commit 6bc7e4e

Browse files
author
Paolo Abeni
committed
Revert "net: skb: introduce and use a single page frag cache"
After the previous commit is finally safe to revert commit dbae2b0 ("net: skb: introduce and use a single page frag cache"): do it here. The intended goal of such change was to counter a performance regression introduced by commit 3226b15 ("net: avoid 32 x truesize under-estimation for tiny skbs"). Unfortunately, the blamed commit introduces another regression for the virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny size, so that the whole head frag could fit a 512-byte block. The single page frag cache uses a 1K fragment for such allocation, and the additional overhead, under small UDP packets flood, makes the page allocator a bottleneck. Thanks to commit bf9f1ba ("net: add dedicated kmem_cache for typical/small skb->head"), this revert does not re-introduce the original regression. Actually, in the relevant test on top of this revert, I measure a small but noticeable positive delta, just above noise level. The revert itself required some additional mangling due to recent updates in the affected code. Suggested-by: Eric Dumazet <[email protected]> Fixes: dbae2b0 ("net: skb: introduce and use a single page frag cache") Reviewed-by: Eric Dumazet <[email protected]> Signed-off-by: Paolo Abeni <[email protected]>
1 parent 14ad6ed commit 6bc7e4e

File tree

3 files changed

+22
-100
lines changed

3 files changed

+22
-100
lines changed

include/linux/netdevice.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4117,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head);
41174117
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
41184118
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
41194119
struct sk_buff *napi_get_frags(struct napi_struct *napi);
4120-
void napi_get_frags_check(struct napi_struct *napi);
41214120
gro_result_t napi_gro_frags(struct napi_struct *napi);
41224121

41234122
static inline void napi_free_frags(struct napi_struct *napi)

net/core/dev.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6991,6 +6991,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
69916991
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
69926992
}
69936993

6994+
/* Double check that napi_get_frags() allocates skbs with
6995+
* skb->head being backed by slab, not a page fragment.
6996+
* This is to make sure bug fixed in 3226b158e67c
6997+
* ("net: avoid 32 x truesize under-estimation for tiny skbs")
6998+
* does not accidentally come back.
6999+
*/
7000+
static void napi_get_frags_check(struct napi_struct *napi)
7001+
{
7002+
struct sk_buff *skb;
7003+
7004+
local_bh_disable();
7005+
skb = napi_get_frags(napi);
7006+
WARN_ON_ONCE(skb && skb->head_frag);
7007+
napi_free_frags(napi);
7008+
local_bh_enable();
7009+
}
7010+
69947011
void netif_napi_add_weight_locked(struct net_device *dev,
69957012
struct napi_struct *napi,
69967013
int (*poll)(struct napi_struct *, int),

net/core/skbuff.c

Lines changed: 5 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -223,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
223223
#define NAPI_SKB_CACHE_BULK 16
224224
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
225225

226-
#if PAGE_SIZE == SZ_4K
227-
228-
#define NAPI_HAS_SMALL_PAGE_FRAG 1
229-
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
230-
231-
/* specialized page frag allocator using a single order 0 page
232-
* and slicing it into 1K sized fragment. Constrained to systems
233-
* with a very limited amount of 1K fragments fitting a single
234-
* page - to avoid excessive truesize underestimation
235-
*/
236-
237-
struct page_frag_1k {
238-
void *va;
239-
u16 offset;
240-
bool pfmemalloc;
241-
};
242-
243-
static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
244-
{
245-
struct page *page;
246-
int offset;
247-
248-
offset = nc->offset - SZ_1K;
249-
if (likely(offset >= 0))
250-
goto use_frag;
251-
252-
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
253-
if (!page)
254-
return NULL;
255-
256-
nc->va = page_address(page);
257-
nc->pfmemalloc = page_is_pfmemalloc(page);
258-
offset = PAGE_SIZE - SZ_1K;
259-
page_ref_add(page, offset / SZ_1K);
260-
261-
use_frag:
262-
nc->offset = offset;
263-
return nc->va + offset;
264-
}
265-
#else
266-
267-
/* the small page is actually unused in this build; add dummy helpers
268-
* to please the compiler and avoid later preprocessor's conditionals
269-
*/
270-
#define NAPI_HAS_SMALL_PAGE_FRAG 0
271-
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
272-
273-
struct page_frag_1k {
274-
};
275-
276-
static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
277-
{
278-
return NULL;
279-
}
280-
281-
#endif
282-
283226
struct napi_alloc_cache {
284227
local_lock_t bh_lock;
285228
struct page_frag_cache page;
286-
struct page_frag_1k page_small;
287229
unsigned int skb_count;
288230
void *skb_cache[NAPI_SKB_CACHE_SIZE];
289231
};
@@ -293,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
293235
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
294236
};
295237

296-
/* Double check that napi_get_frags() allocates skbs with
297-
* skb->head being backed by slab, not a page fragment.
298-
* This is to make sure bug fixed in 3226b158e67c
299-
* ("net: avoid 32 x truesize under-estimation for tiny skbs")
300-
* does not accidentally come back.
301-
*/
302-
void napi_get_frags_check(struct napi_struct *napi)
303-
{
304-
struct sk_buff *skb;
305-
306-
local_bh_disable();
307-
skb = napi_get_frags(napi);
308-
WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
309-
napi_free_frags(napi);
310-
local_bh_enable();
311-
}
312-
313238
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
314239
{
315240
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -816,11 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
816741

817742
/* If requested length is either too small or too big,
818743
* we use kmalloc() for skb->head allocation.
819-
* When the small frag allocator is available, prefer it over kmalloc
820-
* for small fragments
821744
*/
822-
if ((!NAPI_HAS_SMALL_PAGE_FRAG &&
823-
len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) ||
745+
if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
824746
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
825747
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
826748
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -830,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
830752
goto skb_success;
831753
}
832754

755+
len = SKB_HEAD_ALIGN(len);
756+
833757
if (sk_memalloc_socks())
834758
gfp_mask |= __GFP_MEMALLOC;
835759

836760
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
837761
nc = this_cpu_ptr(&napi_alloc_cache);
838-
if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
839-
/* we are artificially inflating the allocation size, but
840-
* that is not as bad as it may look like, as:
841-
* - 'len' less than GRO_MAX_HEAD makes little sense
842-
* - On most systems, larger 'len' values lead to fragment
843-
* size above 512 bytes
844-
* - kmalloc would use the kmalloc-1k slab for such values
845-
* - Builds with smaller GRO_MAX_HEAD will very likely do
846-
* little networking, as that implies no WiFi and no
847-
* tunnels support, and 32 bits arches.
848-
*/
849-
len = SZ_1K;
850762

851-
data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
852-
pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
853-
} else {
854-
len = SKB_HEAD_ALIGN(len);
855-
856-
data = page_frag_alloc(&nc->page, len, gfp_mask);
857-
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
858-
}
763+
data = page_frag_alloc(&nc->page, len, gfp_mask);
764+
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
859765
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
860766

861767
if (unlikely(!data))

0 commit comments

Comments
 (0)