Skip to content

Commit a9e6a75

Browse files
q2venKernel Patches Daemon
authored andcommitted
bpf: Introduce SK_BPF_MEMCG_FLAGS and SK_BPF_MEMCG_SOCK_ISOLATED.
We will decouple sockets from the global protocol memory accounting if sockets have SK_BPF_MEMCG_SOCK_ISOLATED. This can be flagged, during socket() or before sk->sk_memcg is set in accept(), via bpf_setsockopt(): flags = SK_BPF_MEMCG_SOCK_ISOLATED; bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_MEMCG_FLAGS, &flags, sizeof(flags)); Given sk->sk_memcg can be accessed in the fast path, it would be preferable to place the flag field in the same cache line as sk->sk_memcg. However, struct sock does not have such a 1-byte hole. Let's store the flag in the lowest bit of sk->sk_memcg and add a helper to check the bit. In the next patch, if mem_cgroup_sk_isolated() returns true, the socket will not be charged to sk->sk_prot->memory_allocated. The main targets are BPF_CGROUP_INET_SOCK_CREATE and BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB as demonstrated in the selftest. Note that we do not support modifying the flag once sk->sk_memcg is set especially because UDP charges memory under sk->sk_receive_queue.lock instead of lock_sock(). Signed-off-by: Kuniyuki Iwashima <[email protected]>
1 parent e933110 commit a9e6a75

File tree

5 files changed

+91
-1
lines changed

5 files changed

+91
-1
lines changed

include/net/sock.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,17 +2596,51 @@ static inline gfp_t gfp_memcg_charge(void)
25962596
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
25972597
}
25982598

2599+
#define SK_BPF_MEMCG_FLAG_MASK (SK_BPF_MEMCG_FLAG_MAX - 1)
2600+
#define SK_BPF_MEMCG_PTR_MASK ~SK_BPF_MEMCG_FLAG_MASK
2601+
25992602
#ifdef CONFIG_MEMCG
2603+
static inline void mem_cgroup_sk_set_flags(struct sock *sk, unsigned short flags)
2604+
{
2605+
unsigned long val = (unsigned long)sk->sk_memcg;
2606+
2607+
val |= flags;
2608+
sk->sk_memcg = (struct mem_cgroup *)val;
2609+
}
2610+
2611+
static inline unsigned short mem_cgroup_sk_get_flags(const struct sock *sk)
2612+
{
2613+
#ifdef CONFIG_CGROUP_BPF
2614+
unsigned long val = (unsigned long)sk->sk_memcg;
2615+
2616+
return val & SK_BPF_MEMCG_FLAG_MASK;
2617+
#else
2618+
return 0;
2619+
#endif
2620+
}
2621+
26002622
static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
26012623
{
2624+
#ifdef CONFIG_CGROUP_BPF
2625+
unsigned long val = (unsigned long)sk->sk_memcg;
2626+
2627+
val &= SK_BPF_MEMCG_PTR_MASK;
2628+
return (struct mem_cgroup *)val;
2629+
#else
26022630
return sk->sk_memcg;
2631+
#endif
26032632
}
26042633

26052634
static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26062635
{
26072636
return mem_cgroup_sockets_enabled && mem_cgroup_from_sk(sk);
26082637
}
26092638

2639+
static inline bool mem_cgroup_sk_isolated(const struct sock *sk)
2640+
{
2641+
return mem_cgroup_sk_get_flags(sk) & SK_BPF_MEMCG_SOCK_ISOLATED;
2642+
}
2643+
26102644
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26112645
{
26122646
struct mem_cgroup *memcg = mem_cgroup_from_sk(sk);
@@ -2624,6 +2658,15 @@ static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26242658
return false;
26252659
}
26262660
#else
2661+
static inline void mem_cgroup_sk_set_flags(struct sock *sk, unsigned short flags)
2662+
{
2663+
}
2664+
2665+
static inline unsigned short mem_cgroup_sk_get_flags(const struct sock *sk)
2666+
{
2667+
return 0;
2668+
}
2669+
26272670
static inline struct mem_cgroup *mem_cgroup_from_sk(const struct sock *sk)
26282671
{
26292672
return NULL;
@@ -2634,6 +2677,11 @@ static inline bool mem_cgroup_sk_enabled(const struct sock *sk)
26342677
return false;
26352678
}
26362679

2680+
static inline bool mem_cgroup_sk_isolated(const struct sock *sk)
2681+
{
2682+
return false;
2683+
}
2684+
26372685
static inline bool mem_cgroup_sk_under_memory_pressure(const struct sock *sk)
26382686
{
26392687
return false;

include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7182,6 +7182,7 @@ enum {
71827182
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
71837183
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
71847184
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
7185+
SK_BPF_MEMCG_FLAGS = 1010, /* Get or Set flags saved in sk->sk_memcg */
71857186
};
71867187

71877188
enum {
@@ -7204,6 +7205,11 @@ enum {
72047205
*/
72057206
};
72067207

7208+
enum {
7209+
SK_BPF_MEMCG_SOCK_ISOLATED = (1UL << 0),
7210+
SK_BPF_MEMCG_FLAG_MAX = (1UL << 1),
7211+
};
7212+
72077213
struct bpf_perf_event_value {
72087214
__u64 counter;
72097215
__u64 enabled;

net/core/filter.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5267,6 +5267,27 @@ static int sk_bpf_set_get_cb_flags(struct sock *sk, char *optval, bool getopt)
52675267
return 0;
52685268
}
52695269

5270+
static int sk_bpf_set_get_memcg_flags(struct sock *sk, int *optval, bool getopt)
5271+
{
5272+
if (!sk_has_account(sk))
5273+
return -EOPNOTSUPP;
5274+
5275+
if (getopt) {
5276+
*optval = mem_cgroup_sk_get_flags(sk);
5277+
return 0;
5278+
}
5279+
5280+
if (sock_owned_by_user_nocheck(sk) && mem_cgroup_from_sk(sk))
5281+
return -EBUSY;
5282+
5283+
if (*optval <= 0 || *optval >= SK_BPF_MEMCG_FLAG_MAX)
5284+
return -EINVAL;
5285+
5286+
mem_cgroup_sk_set_flags(sk, *optval);
5287+
5288+
return 0;
5289+
}
5290+
52705291
static int sol_socket_sockopt(struct sock *sk, int optname,
52715292
char *optval, int *optlen,
52725293
bool getopt)
@@ -5284,6 +5305,7 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
52845305
case SO_BINDTOIFINDEX:
52855306
case SO_TXREHASH:
52865307
case SK_BPF_CB_FLAGS:
5308+
case SK_BPF_MEMCG_FLAGS:
52875309
if (*optlen != sizeof(int))
52885310
return -EINVAL;
52895311
break;
@@ -5293,8 +5315,12 @@ static int sol_socket_sockopt(struct sock *sk, int optname,
52935315
return -EINVAL;
52945316
}
52955317

5296-
if (optname == SK_BPF_CB_FLAGS)
5318+
switch (optname) {
5319+
case SK_BPF_CB_FLAGS:
52975320
return sk_bpf_set_get_cb_flags(sk, optval, getopt);
5321+
case SK_BPF_MEMCG_FLAGS:
5322+
return sk_bpf_set_get_memcg_flags(sk, (int *)optval, getopt);
5323+
}
52985324

52995325
if (getopt) {
53005326
if (optname == SO_BINDTODEVICE)

net/ipv4/af_inet.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,12 +758,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new
758758
(!IS_ENABLED(CONFIG_IP_SCTP) ||
759759
sk_is_tcp(newsk) || sk_is_mptcp(newsk))) {
760760
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
761+
unsigned short flags;
761762

763+
flags = mem_cgroup_sk_get_flags(newsk);
762764
mem_cgroup_sk_alloc(newsk);
763765

764766
if (mem_cgroup_from_sk(newsk)) {
765767
int amt;
766768

769+
mem_cgroup_sk_set_flags(newsk, flags);
770+
767771
/* The socket has not been accepted yet, no need
768772
* to look at newsk->sk_wmem_queued.
769773
*/

tools/include/uapi/linux/bpf.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7182,6 +7182,7 @@ enum {
71827182
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
71837183
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
71847184
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
7185+
SK_BPF_MEMCG_FLAGS = 1010, /* Get or Set flags saved in sk->sk_memcg */
71857186
};
71867187

71877188
enum {
@@ -7204,6 +7205,11 @@ enum {
72047205
*/
72057206
};
72067207

7208+
enum {
7209+
SK_BPF_MEMCG_SOCK_ISOLATED = (1UL << 0),
7210+
SK_BPF_MEMCG_FLAG_MAX = (1UL << 1),
7211+
};
7212+
72077213
struct bpf_perf_event_value {
72087214
__u64 counter;
72097215
__u64 enabled;

0 commit comments

Comments
 (0)