Skip to content

Commit 9026d0a

Browse files
q2venKernel Patches Daemon
authored andcommitted
net-memcg: Allow decoupling memcg from global protocol memory accounting.
Some protocols (e.g., TCP, UDP) implement memory accounting for socket buffers and charge memory to per-protocol global counters pointed to by sk->sk_proto->memory_allocated. If a socket has sk->sk_memcg, this memory is also charged to memcg as "sock" in memory.stat. We do not need to pay costs for two orthogonal memory accounting mechanisms. A microbenchmark result is in the subsequent bpf patch. Let's decouple sockets under memcg from the global per-protocol memory accounting if mem_cgroup_sk_exclusive() returns true. Note that this does NOT disable memcg, but rather the per-protocol one. mem_cgroup_sk_exclusive() starts to return true in the following patches, and then, the per-protocol memory accounting will be skipped. In __inet_accept(), we need to reclaim counts that are already charged for child sockets because we do not allocate sk->sk_memcg until accept(). trace_sock_exceed_buf_limit() will always show 0 as accounted for the memcg-exclusive sockets, but this can be obtained in memory.stat. Signed-off-by: Kuniyuki Iwashima <[email protected]> Nacked-by: Johannes Weiner <[email protected]> Reviewed-by: Shakeel Butt <[email protected]>
1 parent 23ab9dd commit 9026d0a

File tree

10 files changed

+100
-32
lines changed

10 files changed

+100
-32
lines changed

include/net/proto_memory.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,22 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
3131
if (!sk->sk_prot->memory_pressure)
3232
return false;
3333

34-
if (mem_cgroup_sk_enabled(sk) &&
35-
mem_cgroup_sk_under_memory_pressure(sk))
36-
return true;
34+
if (mem_cgroup_sk_enabled(sk)) {
35+
if (mem_cgroup_sk_under_memory_pressure(sk))
36+
return true;
37+
38+
if (mem_cgroup_sk_exclusive(sk))
39+
return false;
40+
}
3741

3842
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
3943
}
4044

45+
static inline bool sk_should_enter_memory_pressure(struct sock *sk)
46+
{
47+
return !mem_cgroup_sk_enabled(sk) || !mem_cgroup_sk_exclusive(sk);
48+
}
49+
4150
static inline long
4251
proto_memory_allocated(const struct proto *prot)
4352
{

include/net/sock.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2607,6 +2607,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26072607
return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
26082608
}
26092609

2610+
static inline bool mem_cgroup_sk_exclusive(const struct sock *sk)
2611+
{
2612+
return false;
2613+
}
2614+
26102615
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26112616
{
26122617
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
@@ -2634,6 +2639,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26342639
return false;
26352640
}
26362641

2642+
static inline bool mem_cgroup_sk_exclusive(const struct sock *sk)
2643+
{
2644+
return false;
2645+
}
2646+
26372647
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26382648
{
26392649
return false;

include/net/tcp.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,13 @@ extern unsigned long tcp_memory_pressure;
275275
/* optimized version of sk_under_memory_pressure() for TCP sockets */
276276
static inline bool tcp_under_memory_pressure(const struct sock *sk)
277277
{
278-
if (mem_cgroup_sk_enabled(sk) &&
279-
mem_cgroup_sk_under_memory_pressure(sk))
280-
return true;
278+
if (mem_cgroup_sk_enabled(sk)) {
279+
if (mem_cgroup_sk_under_memory_pressure(sk))
280+
return true;
281+
282+
if (mem_cgroup_sk_exclusive(sk))
283+
return false;
284+
}
281285

282286
return READ_ONCE(tcp_memory_pressure);
283287
}

net/core/sock.c

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,17 +1046,21 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
10461046
if (!charged)
10471047
return -ENOMEM;
10481048

1049-
/* pre-charge to forward_alloc */
1050-
sk_memory_allocated_add(sk, pages);
1051-
allocated = sk_memory_allocated(sk);
1052-
/* If the system goes into memory pressure with this
1053-
* precharge, give up and return error.
1054-
*/
1055-
if (allocated > sk_prot_mem_limits(sk, 1)) {
1056-
sk_memory_allocated_sub(sk, pages);
1057-
mem_cgroup_sk_uncharge(sk, pages);
1058-
return -ENOMEM;
1049+
if (!mem_cgroup_sk_exclusive(sk)) {
1050+
/* pre-charge to forward_alloc */
1051+
sk_memory_allocated_add(sk, pages);
1052+
allocated = sk_memory_allocated(sk);
1053+
1054+
/* If the system goes into memory pressure with this
1055+
* precharge, give up and return error.
1056+
*/
1057+
if (allocated > sk_prot_mem_limits(sk, 1)) {
1058+
sk_memory_allocated_sub(sk, pages);
1059+
mem_cgroup_sk_uncharge(sk, pages);
1060+
return -ENOMEM;
1061+
}
10591062
}
1063+
10601064
sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
10611065

10621066
WRITE_ONCE(sk->sk_reserved_mem,
@@ -3153,8 +3157,11 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
31533157
if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
31543158
return true;
31553159

3156-
sk_enter_memory_pressure(sk);
3160+
if (sk_should_enter_memory_pressure(sk))
3161+
sk_enter_memory_pressure(sk);
3162+
31573163
sk_stream_moderate_sndbuf(sk);
3164+
31583165
return false;
31593166
}
31603167
EXPORT_SYMBOL(sk_page_frag_refill);
@@ -3267,18 +3274,30 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
32673274
{
32683275
bool memcg_enabled = false, charged = false;
32693276
struct proto *prot = sk->sk_prot;
3270-
long allocated;
3271-
3272-
sk_memory_allocated_add(sk, amt);
3273-
allocated = sk_memory_allocated(sk);
3277+
long allocated = 0;
32743278

32753279
if (mem_cgroup_sk_enabled(sk)) {
3280+
bool exclusive = mem_cgroup_sk_exclusive(sk);
3281+
32763282
memcg_enabled = true;
32773283
charged = mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge());
3278-
if (!charged)
3284+
3285+
if (exclusive && charged)
3286+
return 1;
3287+
3288+
if (!charged) {
3289+
if (!exclusive) {
3290+
sk_memory_allocated_add(sk, amt);
3291+
allocated = sk_memory_allocated(sk);
3292+
}
3293+
32793294
goto suppress_allocation;
3295+
}
32803296
}
32813297

3298+
sk_memory_allocated_add(sk, amt);
3299+
allocated = sk_memory_allocated(sk);
3300+
32823301
/* Under limit. */
32833302
if (allocated <= sk_prot_mem_limits(sk, 0)) {
32843303
sk_leave_memory_pressure(sk);
@@ -3357,7 +3376,8 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
33573376

33583377
trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
33593378

3360-
sk_memory_allocated_sub(sk, amt);
3379+
if (allocated)
3380+
sk_memory_allocated_sub(sk, amt);
33613381

33623382
if (charged)
33633383
mem_cgroup_sk_uncharge(sk, amt);
@@ -3396,11 +3416,15 @@ EXPORT_SYMBOL(__sk_mem_schedule);
33963416
*/
33973417
void __sk_mem_reduce_allocated(struct sock *sk, int amount)
33983418
{
3399-
sk_memory_allocated_sub(sk, amount);
3400-
3401-
if (mem_cgroup_sk_enabled(sk))
3419+
if (mem_cgroup_sk_enabled(sk)) {
34023420
mem_cgroup_sk_uncharge(sk, amount);
34033421

3422+
if (mem_cgroup_sk_exclusive(sk))
3423+
return;
3424+
}
3425+
3426+
sk_memory_allocated_sub(sk, amount);
3427+
34043428
if (sk_under_global_memory_pressure(sk) &&
34053429
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
34063430
sk_leave_memory_pressure(sk);

net/ipv4/af_inet.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
#include <net/checksum.h>
9696
#include <net/ip.h>
9797
#include <net/protocol.h>
98+
#include <net/proto_memory.h>
9899
#include <net/arp.h>
99100
#include <net/route.h>
100101
#include <net/ip_fib.h>
@@ -768,8 +769,17 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
768769
*/
769770
amt = sk_mem_pages(newsk->sk_forward_alloc +
770771
atomic_read(&newsk->sk_rmem_alloc));
771-
if (amt)
772+
if (amt) {
773+
/* This amt is already charged globally to
774+
* sk_prot->memory_allocated due to lack of
775+
* sk_memcg until accept(), thus we need to
776+
* reclaim it here if newsk is isolated.
777+
*/
778+
if (mem_cgroup_sk_exclusive(newsk))
779+
sk_memory_allocated_sub(newsk, amt);
780+
772781
mem_cgroup_sk_charge(newsk, amt, gfp);
782+
}
773783
}
774784

775785
kmem_cache_charge(newsk, gfp);

net/ipv4/inet_connection_sock.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <net/tcp.h>
2323
#include <net/sock_reuseport.h>
2424
#include <net/addrconf.h>
25+
#include <net/proto_memory.h>
2526

2627
#if IS_ENABLED(CONFIG_IPV6)
2728
/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses

net/ipv4/tcp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,8 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
908908
}
909909
__kfree_skb(skb);
910910
} else {
911-
sk->sk_prot->enter_memory_pressure(sk);
911+
if (sk_should_enter_memory_pressure(sk))
912+
tcp_enter_memory_pressure(sk);
912913
sk_stream_moderate_sndbuf(sk);
913914
}
914915
return NULL;

net/ipv4/tcp_output.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3574,12 +3574,18 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
35743574
delta = size - sk->sk_forward_alloc;
35753575
if (delta <= 0)
35763576
return;
3577+
35773578
amt = sk_mem_pages(delta);
35783579
sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
3579-
sk_memory_allocated_add(sk, amt);
35803580

3581-
if (mem_cgroup_sk_enabled(sk))
3581+
if (mem_cgroup_sk_enabled(sk)) {
35823582
mem_cgroup_sk_charge(sk, amt, gfp_memcg_charge() | __GFP_NOFAIL);
3583+
3584+
if (mem_cgroup_sk_exclusive(sk))
3585+
return;
3586+
}
3587+
3588+
sk_memory_allocated_add(sk, amt);
35833589
}
35843590

35853591
/* Send a FIN. The caller locks the socket for us.

net/mptcp/protocol.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <net/inet_common.h>
1717
#include <net/inet_hashtables.h>
1818
#include <net/protocol.h>
19+
#include <net/proto_memory.h>
1920
#include <net/tcp_states.h>
2021
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
2122
#include <net/transp_v6.h>
@@ -1016,7 +1017,7 @@ static void mptcp_enter_memory_pressure(struct sock *sk)
10161017
mptcp_for_each_subflow(msk, subflow) {
10171018
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
10181019

1019-
if (first)
1020+
if (first && sk_should_enter_memory_pressure(ssk))
10201021
tcp_enter_memory_pressure(ssk);
10211022
sk_stream_moderate_sndbuf(ssk);
10221023

net/tls/tls_device.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <linux/netdevice.h>
3636
#include <net/dst.h>
3737
#include <net/inet_connection_sock.h>
38+
#include <net/proto_memory.h>
3839
#include <net/tcp.h>
3940
#include <net/tls.h>
4041
#include <linux/skbuff_ref.h>
@@ -371,7 +372,8 @@ static int tls_do_allocation(struct sock *sk,
371372
if (!offload_ctx->open_record) {
372373
if (unlikely(!skb_page_frag_refill(prepend_size, pfrag,
373374
sk->sk_allocation))) {
374-
READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk);
375+
if (sk_should_enter_memory_pressure(sk))
376+
READ_ONCE(sk->sk_prot)->enter_memory_pressure(sk);
375377
sk_stream_moderate_sndbuf(sk);
376378
return -ENOMEM;
377379
}

0 commit comments

Comments
 (0)