Skip to content

Commit dd3188d

Browse files
author
Paolo Abeni
committed
Merge branch 'net-remove-the-single-page-frag-cache-for-good'
Paolo Abeni says: ==================== net: remove the single page frag cache for good This is another attempt at reverting commit dbae2b0 ("net: skb: introduce and use a single page frag cache"), as it causes regressions in specific use-cases. Reverting such commit uncovers an allocation issue for build with CONFIG_MAX_SKB_FRAGS=45, as reported by Sabrina. This series handle the latter in patch 1 and brings the revert in patch 2. Note that there is a little chicken-egg problem, as I included into the patch 1's changelog the splat that would be visible only applying first the revert: I think current patch order is better for bisectability, still the splat is useful for correct attribution. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
2 parents 878e7b1 + 6bc7e4e commit dd3188d

File tree

5 files changed

+30
-104
lines changed

5 files changed

+30
-104
lines changed

include/linux/netdevice.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4117,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head);
41174117
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
41184118
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
41194119
struct sk_buff *napi_get_frags(struct napi_struct *napi);
4120-
void napi_get_frags_check(struct napi_struct *napi);
41214120
gro_result_t napi_gro_frags(struct napi_struct *napi);
41224121

41234122
static inline void napi_free_frags(struct napi_struct *napi)

include/net/gro.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#include <net/udp.h>
1212
#include <net/hotdata.h>
1313

14+
/* This should be increased if a protocol with a bigger head is added. */
15+
#define GRO_MAX_HEAD (MAX_HEADER + 128)
16+
1417
struct napi_gro_cb {
1518
union {
1619
struct {

net/core/dev.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6991,6 +6991,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
69916991
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
69926992
}
69936993

6994+
/* Double check that napi_get_frags() allocates skbs with
6995+
* skb->head being backed by slab, not a page fragment.
6996+
* This is to make sure bug fixed in 3226b158e67c
6997+
* ("net: avoid 32 x truesize under-estimation for tiny skbs")
6998+
* does not accidentally come back.
6999+
*/
7000+
static void napi_get_frags_check(struct napi_struct *napi)
7001+
{
7002+
struct sk_buff *skb;
7003+
7004+
local_bh_disable();
7005+
skb = napi_get_frags(napi);
7006+
WARN_ON_ONCE(skb && skb->head_frag);
7007+
napi_free_frags(napi);
7008+
local_bh_enable();
7009+
}
7010+
69947011
void netif_napi_add_weight_locked(struct net_device *dev,
69957012
struct napi_struct *napi,
69967013
int (*poll)(struct napi_struct *, int),

net/core/gro.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77

88
#define MAX_GRO_SKBS 8
99

10-
/* This should be increased if a protocol with a bigger head is added. */
11-
#define GRO_MAX_HEAD (MAX_HEADER + 128)
12-
1310
static DEFINE_SPINLOCK(offload_lock);
1411

1512
/**

net/core/skbuff.c

Lines changed: 10 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#include <net/dst.h>
7070
#include <net/sock.h>
7171
#include <net/checksum.h>
72+
#include <net/gro.h>
7273
#include <net/gso.h>
7374
#include <net/hotdata.h>
7475
#include <net/ip6_checksum.h>
@@ -95,7 +96,9 @@
9596
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
9697
#endif
9798

98-
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
99+
#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
100+
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
101+
GRO_MAX_HEAD_PAD))
99102

100103
/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
101104
* This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
@@ -220,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
220223
#define NAPI_SKB_CACHE_BULK 16
221224
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
222225

223-
#if PAGE_SIZE == SZ_4K
224-
225-
#define NAPI_HAS_SMALL_PAGE_FRAG 1
226-
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
227-
228-
/* specialized page frag allocator using a single order 0 page
229-
* and slicing it into 1K sized fragment. Constrained to systems
230-
* with a very limited amount of 1K fragments fitting a single
231-
* page - to avoid excessive truesize underestimation
232-
*/
233-
234-
struct page_frag_1k {
235-
void *va;
236-
u16 offset;
237-
bool pfmemalloc;
238-
};
239-
240-
static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
241-
{
242-
struct page *page;
243-
int offset;
244-
245-
offset = nc->offset - SZ_1K;
246-
if (likely(offset >= 0))
247-
goto use_frag;
248-
249-
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
250-
if (!page)
251-
return NULL;
252-
253-
nc->va = page_address(page);
254-
nc->pfmemalloc = page_is_pfmemalloc(page);
255-
offset = PAGE_SIZE - SZ_1K;
256-
page_ref_add(page, offset / SZ_1K);
257-
258-
use_frag:
259-
nc->offset = offset;
260-
return nc->va + offset;
261-
}
262-
#else
263-
264-
/* the small page is actually unused in this build; add dummy helpers
265-
* to please the compiler and avoid later preprocessor's conditionals
266-
*/
267-
#define NAPI_HAS_SMALL_PAGE_FRAG 0
268-
#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
269-
270-
struct page_frag_1k {
271-
};
272-
273-
static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
274-
{
275-
return NULL;
276-
}
277-
278-
#endif
279-
280226
struct napi_alloc_cache {
281227
local_lock_t bh_lock;
282228
struct page_frag_cache page;
283-
struct page_frag_1k page_small;
284229
unsigned int skb_count;
285230
void *skb_cache[NAPI_SKB_CACHE_SIZE];
286231
};
@@ -290,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
290235
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
291236
};
292237

293-
/* Double check that napi_get_frags() allocates skbs with
294-
* skb->head being backed by slab, not a page fragment.
295-
* This is to make sure bug fixed in 3226b158e67c
296-
* ("net: avoid 32 x truesize under-estimation for tiny skbs")
297-
* does not accidentally come back.
298-
*/
299-
void napi_get_frags_check(struct napi_struct *napi)
300-
{
301-
struct sk_buff *skb;
302-
303-
local_bh_disable();
304-
skb = napi_get_frags(napi);
305-
WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
306-
napi_free_frags(napi);
307-
local_bh_enable();
308-
}
309-
310238
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
311239
{
312240
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -736,7 +664,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
736664
/* If requested length is either too small or too big,
737665
* we use kmalloc() for skb->head allocation.
738666
*/
739-
if (len <= SKB_WITH_OVERHEAD(1024) ||
667+
if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
740668
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
741669
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
742670
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
@@ -813,10 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
813741

814742
/* If requested length is either too small or too big,
815743
* we use kmalloc() for skb->head allocation.
816-
* When the small frag allocator is available, prefer it over kmalloc
817-
* for small fragments
818744
*/
819-
if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
745+
if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
820746
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
821747
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
822748
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
826752
goto skb_success;
827753
}
828754

755+
len = SKB_HEAD_ALIGN(len);
756+
829757
if (sk_memalloc_socks())
830758
gfp_mask |= __GFP_MEMALLOC;
831759

832760
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
833761
nc = this_cpu_ptr(&napi_alloc_cache);
834-
if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
835-
/* we are artificially inflating the allocation size, but
836-
* that is not as bad as it may look like, as:
837-
* - 'len' less than GRO_MAX_HEAD makes little sense
838-
* - On most systems, larger 'len' values lead to fragment
839-
* size above 512 bytes
840-
* - kmalloc would use the kmalloc-1k slab for such values
841-
* - Builds with smaller GRO_MAX_HEAD will very likely do
842-
* little networking, as that implies no WiFi and no
843-
* tunnels support, and 32 bits arches.
844-
*/
845-
len = SZ_1K;
846762

847-
data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
848-
pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
849-
} else {
850-
len = SKB_HEAD_ALIGN(len);
851-
852-
data = page_frag_alloc(&nc->page, len, gfp_mask);
853-
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
854-
}
763+
data = page_frag_alloc(&nc->page, len, gfp_mask);
764+
pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
855765
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
856766

857767
if (unlikely(!data))

0 commit comments

Comments
 (0)