Skip to content

Commit 9ed816b

Browse files
committed
Merge branch 'net-optmem_max-changes'
Eric Dumazet says: ==================== net: optmem_max changes optmem_max default value is too small for tx zerocopy workloads. First patch increases default from 20KB to 128 KB, which is the value we have used for seven years. Second patch makes optmem_max sysctl per netns. Last patch tweaks two tests accordingly. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents e16064c + 18872ba commit 9ed816b

File tree

12 files changed

+41
-36
lines changed

12 files changed

+41
-36
lines changed

Documentation/admin-guide/sysctl/net.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,10 @@ optmem_max
345345
----------
346346

347347
Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
348-
of struct cmsghdr structures with appended data.
348+
of struct cmsghdr structures with appended data. TCP tx zerocopy also uses
349+
optmem_max as a limit for its internal structures.
350+
351+
Default : 128 KB
349352

350353
fb_tunnels_only_for_init_net
351354
----------------------------

include/net/netns/core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ struct netns_core {
1313
struct ctl_table_header *sysctl_hdr;
1414

1515
int sysctl_somaxconn;
16+
int sysctl_optmem_max;
1617
u8 sysctl_txrehash;
1718

1819
#ifdef CONFIG_PROC_FS

include/net/sock.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2920,7 +2920,6 @@ extern __u32 sysctl_wmem_max;
29202920
extern __u32 sysctl_rmem_max;
29212921

29222922
extern int sysctl_tstamp_allow_data;
2923-
extern int sysctl_optmem_max;
29242923

29252924
extern __u32 sysctl_wmem_default;
29262925
extern __u32 sysctl_rmem_default;

net/core/bpf_sk_storage.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
275275
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
276276
void *owner, u32 size)
277277
{
278-
int optmem_max = READ_ONCE(sysctl_optmem_max);
279278
struct sock *sk = (struct sock *)owner;
279+
int optmem_max;
280280

281+
optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
281282
/* same check as in sock_kmalloc() */
282283
if (size <= optmem_max &&
283284
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {

net/core/filter.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,8 +1219,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
12191219
*/
12201220
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
12211221
{
1222+
int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
12221223
u32 filter_size = bpf_prog_size(fp->prog->len);
1223-
int optmem_max = READ_ONCE(sysctl_optmem_max);
12241224

12251225
/* same check as in sock_kmalloc() */
12261226
if (filter_size <= optmem_max &&
@@ -1550,12 +1550,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter);
15501550
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
15511551
{
15521552
struct bpf_prog *prog = __get_filter(fprog, sk);
1553-
int err;
1553+
int err, optmem_max;
15541554

15551555
if (IS_ERR(prog))
15561556
return PTR_ERR(prog);
15571557

1558-
if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
1558+
optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
1559+
if (bpf_prog_size(prog->len) > optmem_max)
15591560
err = -ENOMEM;
15601561
else
15611562
err = reuseport_attach_prog(sk, prog);
@@ -1594,7 +1595,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
15941595
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
15951596
{
15961597
struct bpf_prog *prog;
1597-
int err;
1598+
int err, optmem_max;
15981599

15991600
if (sock_flag(sk, SOCK_FILTER_LOCKED))
16001601
return -EPERM;
@@ -1622,7 +1623,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
16221623
}
16231624
} else {
16241625
/* BPF_PROG_TYPE_SOCKET_FILTER */
1625-
if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
1626+
optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
1627+
if (bpf_prog_size(prog->len) > optmem_max) {
16261628
err = -ENOMEM;
16271629
goto err_prog_put;
16281630
}

net/core/net_namespace.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,10 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
372372
static int __net_init net_defaults_init_net(struct net *net)
373373
{
374374
net->core.sysctl_somaxconn = SOMAXCONN;
375+
/* Limits per socket sk_omem_alloc usage.
376+
* TCP zerocopy regular usage needs 128 KB.
377+
*/
378+
net->core.sysctl_optmem_max = 128 * 1024;
375379
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
376380

377381
return 0;

net/core/sock.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
283283
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
284284
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
285285

286-
/* Maximal space eaten by iovec or ancillary data plus some space */
287-
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
288-
EXPORT_SYMBOL(sysctl_optmem_max);
289-
290286
int sysctl_tstamp_allow_data __read_mostly = 1;
291287

292288
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
@@ -2651,7 +2647,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
26512647

26522648
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
26532649
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2654-
READ_ONCE(sysctl_optmem_max))
2650+
READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
26552651
return NULL;
26562652

26572653
skb = alloc_skb(size, priority);
@@ -2669,7 +2665,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
26692665
*/
26702666
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
26712667
{
2672-
int optmem_max = READ_ONCE(sysctl_optmem_max);
2668+
int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
26732669

26742670
if ((unsigned int)size <= optmem_max &&
26752671
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {

net/core/sysctl_net_core.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -508,13 +508,6 @@ static struct ctl_table net_core_table[] = {
508508
.mode = 0644,
509509
.proc_handler = proc_dointvec,
510510
},
511-
{
512-
.procname = "optmem_max",
513-
.data = &sysctl_optmem_max,
514-
.maxlen = sizeof(int),
515-
.mode = 0644,
516-
.proc_handler = proc_dointvec
517-
},
518511
{
519512
.procname = "tstamp_allow_data",
520513
.data = &sysctl_tstamp_allow_data,
@@ -673,6 +666,14 @@ static struct ctl_table netns_core_table[] = {
673666
.extra1 = SYSCTL_ZERO,
674667
.proc_handler = proc_dointvec_minmax
675668
},
669+
{
670+
.procname = "optmem_max",
671+
.data = &init_net.core.sysctl_optmem_max,
672+
.maxlen = sizeof(int),
673+
.mode = 0644,
674+
.extra1 = SYSCTL_ZERO,
675+
.proc_handler = proc_dointvec_minmax
676+
},
676677
{
677678
.procname = "txrehash",
678679
.data = &init_net.core.sysctl_txrehash,

net/ipv4/ip_sockglue.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
775775

776776
if (optlen < GROUP_FILTER_SIZE(0))
777777
return -EINVAL;
778-
if (optlen > READ_ONCE(sysctl_optmem_max))
778+
if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
779779
return -ENOBUFS;
780780

781781
gsf = memdup_sockptr(optval, optlen);
@@ -811,7 +811,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
811811

812812
if (optlen < size0)
813813
return -EINVAL;
814-
if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
814+
if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
815815
return -ENOBUFS;
816816

817817
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1254,7 +1254,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
12541254

12551255
if (optlen < IP_MSFILTER_SIZE(0))
12561256
goto e_inval;
1257-
if (optlen > READ_ONCE(sysctl_optmem_max)) {
1257+
if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) {
12581258
err = -ENOBUFS;
12591259
break;
12601260
}

net/ipv6/ipv6_sockglue.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
210210

211211
if (optlen < GROUP_FILTER_SIZE(0))
212212
return -EINVAL;
213-
if (optlen > READ_ONCE(sysctl_optmem_max))
213+
if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
214214
return -ENOBUFS;
215215

216216
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
244244

245245
if (optlen < size0)
246246
return -EINVAL;
247-
if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
247+
if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
248248
return -ENOBUFS;
249249

250250
p = kmalloc(optlen + 4, GFP_KERNEL);

0 commit comments

Comments
 (0)