Skip to content

Commit 784d48d

Browse files
q2venKernel Patches Daemon
authored andcommitted
net-memcg: Allow decoupling memcg from global protocol memory accounting.
Some protocols (e.g., TCP, UDP) implement memory accounting for socket buffers and charge memory to per-protocol global counters pointed to by sk->sk_proto->memory_allocated. If a socket has sk->sk_memcg, this memory is also charged to memcg as "sock" in memory.stat. We do not need to pay costs for two orthogonal memory accounting mechanisms. A microbenchmark result is in the subsequent bpf patch. Let's decouple sockets under memcg from the global per-protocol memory accounting if mem_cgroup_sk_exclusive() returns true. Note that this does NOT disable memcg, but rather the per-protocol one. mem_cgroup_sk_exclusive() starts to return true in the following patches, and then, the per-protocol memory accounting will be skipped. In __inet_accept(), we need to reclaim counts that are already charged for child sockets because we do not allocate sk->sk_memcg until accept(). trace_sock_exceed_buf_limit() will always show 0 as accounted for the memcg-exclusive sockets, but this can be obtained in memory.stat. Signed-off-by: Kuniyuki Iwashima <[email protected]> Nacked-by: Johannes Weiner <[email protected]> Reviewed-by: Shakeel Butt <[email protected]>
1 parent 1b73b76 commit 784d48d

File tree

10 files changed

+100
-32
lines changed

10 files changed

+100
-32
lines changed

include/net/proto_memory.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,22 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
3131
if (!sk->sk_prot->memory_pressure)
3232
return false;
3333

34-
if (mem_cgroup_sk_enabled(sk) &&
35-
mem_cgroup_sk_under_memory_pressure(sk))
36-
return true;
34+
if (mem_cgroup_sk_enabled(sk)) {
35+
if (mem_cgroup_sk_under_memory_pressure(sk))
36+
return true;
37+
38+
if (mem_cgroup_sk_exclusive(sk))
39+
return false;
40+
}
3741

3842
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
3943
}
4044

45+
static inline bool sk_should_enter_memory_pressure(struct sock *sk)
46+
{
47+
return !mem_cgroup_sk_enabled(sk) || !mem_cgroup_sk_exclusive(sk);
48+
}
49+
4150
static inline long
4251
proto_memory_allocated(const struct proto *prot)
4352
{

include/net/sock.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,6 +2625,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26252625
return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
26262626
}
26272627

2628+
static inline bool mem_cgroup_sk_exclusive(const struct sock *sk)
2629+
{
2630+
return false;
2631+
}
2632+
26282633
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26292634
{
26302635
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
@@ -2652,6 +2657,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26522657
return false;
26532658
}
26542659

2660+
static inline bool mem_cgroup_sk_exclusive(const struct sock *sk)
2661+
{
2662+
return false;
2663+
}
2664+
26552665
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26562666
{
26572667
return false;

include/net/tcp.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,13 @@ extern unsigned long tcp_memory_pressure;
299299
/* optimized version of sk_under_memory_pressure() for TCP sockets */
300300
static inline bool tcp_under_memory_pressure(const struct sock *sk)
301301
{
302-
if (mem_cgroup_sk_enabled(sk) &&
303-
mem_cgroup_sk_under_memory_pressure(sk))
304-
return true;
302+
if (mem_cgroup_sk_enabled(sk)) {
303+
if (mem_cgroup_sk_under_memory_pressure(sk))
304+
return true;
305+
306+
if (mem_cgroup_sk_exclusive(sk))
307+
return false;
308+
}
305309

306310
return READ_ONCE(tcp_memory_pressure);
307311
}

net/core/sock.c

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,17 +1046,21 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
10461046
if (!charged)
10471047
return -ENOMEM;
10481048

1049-
/* pre-charge to forward_alloc */
1050-
sk_memory_allocated_add(sk, pages);
1051-
allocated = sk_memory_allocated(sk);
1052-
/* If the system goes into memory pressure with this
1053-
* precharge, give up and return error.
1054-
*/
1055-
if (allocated > sk_prot_mem_limits(sk, 1)) {
1056-
sk_memory_allocated_sub(sk, pages);
1057-
mem_cgroup_sk_uncharge(sk, pages);
1058-
return -ENOMEM;
1049+
if (!mem_cgroup_sk_exclusive(sk)) {
1050+
/* pre-charge to forward_alloc */
1051+
sk_memory_allocated_add(sk, pages);
1052+
allocated = sk_memory_allocated(sk);
1053+
1054+
/* If the system goes into memory pressure with this
1055+
* precharge, give up and return error.
1056+
*/
1057+
if (allocated > sk_prot_mem_limits(sk, 1)) {
1058+
sk_memory_allocated_sub(sk, pages);
1059+
mem_cgroup_sk_uncharge(sk, pages);
1060+
return -ENOMEM;
1061+
}
10591062
}
1063+
10601064
sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
10611065

10621066
WRITE_ONCE(sk->sk_reserved_mem,
@@ -3136,8 +3140,11 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
31363140
if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
31373141
return true;
31383142

3139-
sk_enter_memory_pressure(sk);
3143+
if (sk_should_enter_memory_pressure(sk))
3144+
sk_enter_memory_pressure(sk);
3145+
31403146
sk_stream_moderate_sndbuf(sk);
3147+
31413148
return false;
31423149
}
31433150
EXPORT_SYMBOL(sk_page_frag_refill);
@@ -3254,18 +3261,30 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
32543261
{
32553262
bool memcg_enabled = false, charged = false;
32563263
struct proto *prot = sk->sk_prot;
3257-
long allocated;
3258-
3259-
sk_memory_allocated_add(sk, amt);
3260-
allocated = sk_memory_allocated(sk);
3264+
long allocated = 0;
32613265

32623266
if (mem_cgroup_sk_enabled(sk)) {
3267+
bool exclusive = mem_cgroup_sk_exclusive(sk);
3268+
32633269
memcg_enabled = true;
32643270
charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge());
3265-
if (!charged)
3271+
3272+
if (exclusive && charged)
3273+
return 1;
3274+
3275+
if (!charged) {
3276+
if (!exclusive) {
3277+
sk_memory_allocated_add(sk, amt);
3278+
allocated = sk_memory_allocated(sk);
3279+
}
3280+
32663281
goto suppress_allocation;
3282+
}
32673283
}
32683284

3285+
sk_memory_allocated_add(sk, amt);
3286+
allocated = sk_memory_allocated(sk);
3287+
32693288
/* Under limit. */
32703289
if (allocated <= sk_prot_mem_limits(sk, 0)) {
32713290
sk_leave_memory_pressure(sk);
@@ -3344,7 +3363,8 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
33443363

33453364
trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
33463365

3347-
sk_memory_allocated_sub(sk, amt);
3366+
if (allocated)
3367+
sk_memory_allocated_sub(sk, amt);
33483368

33493369
if (charged)
33503370
mem_cgroup_sk_uncharge(sk, amt);
@@ -3383,11 +3403,15 @@ EXPORT_SYMBOL(__sk_mem_schedule);
33833403
*/
33843404
void __sk_mem_reduce_allocated(struct sock *sk, int amount)
33853405
{
3386-
sk_memory_allocated_sub(sk, amount);
3387-
3388-
if (mem_cgroup_sk_enabled(sk))
3406+
if (mem_cgroup_sk_enabled(sk)) {
33893407
mem_cgroup_sk_uncharge(sk, amount);
33903408

3409+
if (mem_cgroup_sk_exclusive(sk))
3410+
return;
3411+
}
3412+
3413+
sk_memory_allocated_sub(sk, amount);
3414+
33913415
if (sk_under_global_memory_pressure(sk) &&
33923416
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
33933417
sk_leave_memory_pressure(sk);

net/ipv4/af_inet.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#include <net/checksum.h>
9696
#include <net/ip.h>
9797
#include <net/protocol.h>
98+
#include <net/proto_memory.h>
9899
#include <net/arp.h>
99100
#include <net/route.h>
100101
#include <net/ip_fib.h>
@@ -770,8 +771,17 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
770771
*/
771772
amt = sk_mem_pages(newsk->sk_forward_alloc +
772773
atomic_read(&newsk->sk_rmem_alloc));
773-
if (amt)
774+
if (amt) {
775+
/* This amt is already charged globally to
776+
* sk_prot->memory_allocated due to lack of
777+
* sk_memcg until accept(), thus we need to
778+
* reclaim it here if newsk is isolated.
779+
*/
780+
if (mem_cgroup_sk_exclusive(newsk))
781+
sk_memory_allocated_sub(newsk, amt);
782+
774783
mem_cgroup_sk_charge(newsk, amt, gfp);
784+
}
775785
}
776786

777787
kmem_cache_charge(newsk, gfp);

net/ipv4/inet_connection_sock.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <net/tcp.h>
2323
#include <net/sock_reuseport.h>
2424
#include <net/addrconf.h>
25+
#include <net/proto_memory.h>
2526

2627
#if IS_ENABLED(CONFIG_IPV6)
2728
/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses

net/ipv4/tcp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,8 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
928928
}
929929
__kfree_skb(skb);
930930
} else {
931-
sk->sk_prot->enter_memory_pressure(sk);
931+
if (sk_should_enter_memory_pressure(sk))
932+
tcp_enter_memory_pressure(sk);
932933
sk_stream_moderate_sndbuf(sk);
933934
}
934935
return NULL;

net/ipv4/tcp_output.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3732,12 +3732,18 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
37323732
delta = size - sk->sk_forward_alloc;
37333733
if (delta <= 0)
37343734
return;
3735+
37353736
amt = sk_mem_pages(delta);
37363737
sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3737-
sk_memory_allocated_add(sk, amt);
37383738

3739-
if (mem_cgroup_sk_enabled(sk))
3739+
if (mem_cgroup_sk_enabled(sk)) {
37403740
mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL);
3741+
3742+
if (mem_cgroup_sk_exclusive(sk))
3743+
return;
3744+
}
3745+
3746+
sk_memory_allocated_add(sk, amt);
37413747
}
37423748

37433749
/* Send a FIN. The caller locks the socket for us.

net/mptcp/protocol.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <net/inet_common.h>
1818
#include <net/inet_hashtables.h>
1919
#include <net/protocol.h>
20+
#include <net/proto_memory.h>
2021
#include <net/tcp_states.h>
2122
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
2223
#include <net/transp_v6.h>
@@ -1030,7 +1031,7 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
10301031
mptcp_for_each_subflow(msk, subflow) {
10311032
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
10321033

1033-
if (first)
1034+
if (first && sk_should_enter_memory_pressure(ssk))
10341035
tcp_enter_memory_pressure(ssk);
10351036
sk_stream_moderate_sndbuf(ssk);
10361037

net/tls/tls_device.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <linux/netdevice.h>
3636
#include <net/dst.h>
3737
#include <net/inet_connection_sock.h>
38+
#include <net/proto_memory.h>
3839
#include <net/tcp.h>
3940
#include <net/tls.h>
4041
#include <linux/skbuff_ref.h>
@@ -373,7 +374,8 @@ static int tls_do_allocation(struct sock *sk,
373374
if (!offload_ctx->open_record) {
374375
if (unlikely(!skb_page_frag_refill(prepend_size, pfrag,
375376
sk->sk_allocation))) {
376-
READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk);
377+
if (sk_should_enter_memory_pressure(sk))
378+
READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk);
377379
sk_stream_moderate_sndbuf(sk);
378380
return -ENOMEM;
379381
}

0 commit comments

Comments
 (0)