Skip to content

Commit a6f1906

Browse files
netoptimizerkuba-moo
authored andcommitted
net: track pfmemalloc drops via SKB_DROP_REASON_PFMEMALLOC
Add a new SKB drop reason (SKB_DROP_REASON_PFMEMALLOC) to track packets dropped due to memory pressure. In production environments, we've observed memory exhaustion reported by memory layer stack traces, but these drops were not properly tracked in the SKB drop reason infrastructure. While most network code paths now properly report pfmemalloc drops, some protocol-specific socket implementations still use sk_filter() without drop reason tracking: - Bluetooth L2CAP sockets - CAIF sockets - IUCV sockets - Netlink sockets - SCTP sockets - Unix domain sockets These remaining cases represent less common paths and could be converted in a follow-up patch if needed. The current implementation provides significantly improved observability into memory pressure events in the network stack, especially for key protocols like TCP and UDP, helping to diagnose problems in production environments. Reported-by: Matt Fleming <[email protected]> Signed-off-by: Jesper Dangaard Brouer <[email protected]> Link: https://patch.msgid.link/175268316579.2407873.11634752355644843509.stgit@firesoul Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 8b7ab8e commit a6f1906

File tree

12 files changed

+75
-44
lines changed

12 files changed

+75
-44
lines changed

drivers/net/tun.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,8 +1002,8 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun,
10021002
/* Net device start xmit */
10031003
static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
10041004
{
1005+
enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
10051006
struct tun_struct *tun = netdev_priv(dev);
1006-
enum skb_drop_reason drop_reason;
10071007
int txq = skb->queue_mapping;
10081008
struct netdev_queue *queue;
10091009
struct tun_file *tfile;
@@ -1032,10 +1032,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
10321032
}
10331033

10341034
if (tfile->socket.sk->sk_filter &&
1035-
sk_filter(tfile->socket.sk, skb)) {
1036-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1035+
sk_filter_reason(tfile->socket.sk, skb, &drop_reason))
10371036
goto drop;
1038-
}
10391037

10401038
len = run_ebpf_filter(tun, skb, len);
10411039
if (len == 0) {

include/linux/filter.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,10 +1073,20 @@ bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
10731073
return set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
10741074
}
10751075

1076-
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
1076+
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap,
1077+
enum skb_drop_reason *reason);
1078+
10771079
static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
10781080
{
1079-
return sk_filter_trim_cap(sk, skb, 1);
1081+
enum skb_drop_reason ignore_reason;
1082+
1083+
return sk_filter_trim_cap(sk, skb, 1, &ignore_reason);
1084+
}
1085+
1086+
static inline int sk_filter_reason(struct sock *sk, struct sk_buff *skb,
1087+
enum skb_drop_reason *reason)
1088+
{
1089+
return sk_filter_trim_cap(sk, skb, 1, reason);
10801090
}
10811091

10821092
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);

include/net/dropreason-core.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@
125125
FN(CAN_RX_INVALID_FRAME) \
126126
FN(CANFD_RX_INVALID_FRAME) \
127127
FN(CANXL_RX_INVALID_FRAME) \
128+
FN(PFMEMALLOC) \
128129
FNe(MAX)
129130

130131
/**
@@ -598,6 +599,11 @@ enum skb_drop_reason {
598599
* non conform CAN-XL frame (or device is unable to receive CAN frames)
599600
*/
600601
SKB_DROP_REASON_CANXL_RX_INVALID_FRAME,
602+
/**
603+
* @SKB_DROP_REASON_PFMEMALLOC: packet allocated from memory reserve
604+
* reached a path or socket not eligible for use of memory reserves
605+
*/
606+
SKB_DROP_REASON_PFMEMALLOC,
601607
/**
602608
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
603609
* shouldn't be used as a real 'reason' - only for tracing code gen

include/net/tcp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1559,7 +1559,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
15591559
enum skb_drop_reason *reason);
15601560

15611561

1562-
int tcp_filter(struct sock *sk, struct sk_buff *skb);
1562+
int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason);
15631563
void tcp_set_state(struct sock *sk, int state);
15641564
void tcp_done(struct sock *sk);
15651565
int tcp_abort(struct sock *sk, int err);

net/core/dev.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5749,6 +5749,7 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
57495749
static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
57505750
struct packet_type **ppt_prev)
57515751
{
5752+
enum skb_drop_reason drop_reason = SKB_DROP_REASON_UNHANDLED_PROTO;
57525753
struct packet_type *ptype, *pt_prev;
57535754
rx_handler_func_t *rx_handler;
57545755
struct sk_buff *skb = *pskb;
@@ -5840,8 +5841,10 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
58405841
#endif
58415842
skb_reset_redirect(skb);
58425843
skip_classify:
5843-
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
5844+
if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) {
5845+
drop_reason = SKB_DROP_REASON_PFMEMALLOC;
58445846
goto drop;
5847+
}
58455848

58465849
if (skb_vlan_tag_present(skb)) {
58475850
if (pt_prev) {
@@ -5946,7 +5949,8 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
59465949
dev_core_stats_rx_dropped_inc(skb->dev);
59475950
else
59485951
dev_core_stats_rx_nohandler_inc(skb->dev);
5949-
kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO);
5952+
5953+
kfree_skb_reason(skb, drop_reason);
59505954
/* Jamal, now you will not able to escape explaining
59515955
* me how you were going to use this. :-)
59525956
*/

net/core/filter.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
122122
* @sk: sock associated with &sk_buff
123123
* @skb: buffer to filter
124124
* @cap: limit on how short the eBPF program may trim the packet
125+
* @reason: record drop reason on errors (negative return value)
125126
*
126127
* Run the eBPF program and then cut skb->data to correct size returned by
127128
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -130,7 +131,8 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
130131
* be accepted or -EPERM if the packet should be tossed.
131132
*
132133
*/
133-
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
134+
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb,
135+
unsigned int cap, enum skb_drop_reason *reason)
134136
{
135137
int err;
136138
struct sk_filter *filter;
@@ -142,15 +144,20 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
142144
*/
143145
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
144146
NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
147+
*reason = SKB_DROP_REASON_PFMEMALLOC;
145148
return -ENOMEM;
146149
}
147150
err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
148-
if (err)
151+
if (err) {
152+
*reason = SKB_DROP_REASON_SOCKET_FILTER;
149153
return err;
154+
}
150155

151156
err = security_sock_rcv_skb(sk, skb);
152-
if (err)
157+
if (err) {
158+
*reason = SKB_DROP_REASON_SECURITY_HOOK;
153159
return err;
160+
}
154161

155162
rcu_read_lock();
156163
filter = rcu_dereference(sk->sk_filter);
@@ -162,6 +169,8 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
162169
pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
163170
skb->sk = save_sk;
164171
err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
172+
if (err)
173+
*reason = SKB_DROP_REASON_SOCKET_FILTER;
165174
}
166175
rcu_read_unlock();
167176

net/core/sock.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -526,11 +526,10 @@ int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb,
526526
enum skb_drop_reason drop_reason;
527527
int err;
528528

529-
err = sk_filter(sk, skb);
530-
if (err) {
531-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
529+
err = sk_filter_reason(sk, skb, &drop_reason);
530+
if (err)
532531
goto out;
533-
}
532+
534533
err = __sock_queue_rcv_skb(sk, skb);
535534
switch (err) {
536535
case -ENOMEM:
@@ -553,15 +552,18 @@ EXPORT_SYMBOL(sock_queue_rcv_skb_reason);
553552
int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
554553
const int nested, unsigned int trim_cap, bool refcounted)
555554
{
555+
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
556556
int rc = NET_RX_SUCCESS;
557+
int err;
557558

558-
if (sk_filter_trim_cap(sk, skb, trim_cap))
559+
if (sk_filter_trim_cap(sk, skb, trim_cap, &reason))
559560
goto discard_and_relse;
560561

561562
skb->dev = NULL;
562563

563564
if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
564565
atomic_inc(&sk->sk_drops);
566+
reason = SKB_DROP_REASON_SOCKET_RCVBUFF;
565567
goto discard_and_relse;
566568
}
567569
if (nested)
@@ -577,8 +579,12 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
577579
rc = sk_backlog_rcv(sk, skb);
578580

579581
mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
580-
} else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
582+
} else if ((err = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))) {
581583
bh_unlock_sock(sk);
584+
if (err == -ENOMEM)
585+
reason = SKB_DROP_REASON_PFMEMALLOC;
586+
if (err == -ENOBUFS)
587+
reason = SKB_DROP_REASON_SOCKET_BACKLOG;
582588
atomic_inc(&sk->sk_drops);
583589
goto discard_and_relse;
584590
}
@@ -589,7 +595,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
589595
sock_put(sk);
590596
return rc;
591597
discard_and_relse:
592-
kfree_skb(skb);
598+
sk_skb_reason_drop(sk, skb, reason);
593599
goto out;
594600
}
595601
EXPORT_SYMBOL(__sk_receive_skb);

net/ipv4/tcp_ipv4.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2026,6 +2026,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
20262026
u32 gso_size;
20272027
u64 limit;
20282028
int delta;
2029+
int err;
20292030

20302031
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
20312032
* we can fix skb->truesize to its real value to avoid future drops.
@@ -2136,21 +2137,27 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
21362137

21372138
limit = min_t(u64, limit, UINT_MAX);
21382139

2139-
if (unlikely(sk_add_backlog(sk, skb, limit))) {
2140+
err = sk_add_backlog(sk, skb, limit);
2141+
if (unlikely(err)) {
21402142
bh_unlock_sock(sk);
2141-
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
2142-
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
2143+
if (err == -ENOMEM) {
2144+
*reason = SKB_DROP_REASON_PFMEMALLOC;
2145+
__NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
2146+
} else {
2147+
*reason = SKB_DROP_REASON_SOCKET_BACKLOG;
2148+
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
2149+
}
21432150
return true;
21442151
}
21452152
return false;
21462153
}
21472154
EXPORT_IPV6_MOD(tcp_add_backlog);
21482155

2149-
int tcp_filter(struct sock *sk, struct sk_buff *skb)
2156+
int tcp_filter(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason)
21502157
{
21512158
struct tcphdr *th = (struct tcphdr *)skb->data;
21522159

2153-
return sk_filter_trim_cap(sk, skb, th->doff * 4);
2160+
return sk_filter_trim_cap(sk, skb, th->doff * 4, reason);
21542161
}
21552162
EXPORT_IPV6_MOD(tcp_filter);
21562163

@@ -2277,14 +2284,12 @@ int tcp_v4_rcv(struct sk_buff *skb)
22772284
}
22782285
refcounted = true;
22792286
nsk = NULL;
2280-
if (!tcp_filter(sk, skb)) {
2287+
if (!tcp_filter(sk, skb, &drop_reason)) {
22812288
th = (const struct tcphdr *)skb->data;
22822289
iph = ip_hdr(skb);
22832290
tcp_v4_fill_cb(skb, iph, th);
22842291
nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
22852292
&drop_reason);
2286-
} else {
2287-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
22882293
}
22892294
if (!nsk) {
22902295
reqsk_put(req);
@@ -2340,10 +2345,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
23402345

23412346
nf_reset_ct(skb);
23422347

2343-
if (tcp_filter(sk, skb)) {
2344-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
2348+
if (tcp_filter(sk, skb, &drop_reason))
23452349
goto discard_and_relse;
2346-
}
2350+
23472351
th = (const struct tcphdr *)skb->data;
23482352
iph = ip_hdr(skb);
23492353
tcp_v4_fill_cb(skb, iph, th);

net/ipv4/udp.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2347,7 +2347,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
23472347
*/
23482348
static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
23492349
{
2350-
int drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
2350+
enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
23512351
struct udp_sock *up = udp_sk(sk);
23522352
int is_udplite = IS_UDPLITE(sk);
23532353

@@ -2436,10 +2436,8 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
24362436
udp_lib_checksum_complete(skb))
24372437
goto csum_error;
24382438

2439-
if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) {
2440-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
2439+
if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason))
24412440
goto drop;
2442-
}
24432441

24442442
udp_csum_pull_header(skb);
24452443

net/ipv6/tcp_ipv6.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,14 +1834,12 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
18341834
}
18351835
refcounted = true;
18361836
nsk = NULL;
1837-
if (!tcp_filter(sk, skb)) {
1837+
if (!tcp_filter(sk, skb, &drop_reason)) {
18381838
th = (const struct tcphdr *)skb->data;
18391839
hdr = ipv6_hdr(skb);
18401840
tcp_v6_fill_cb(skb, hdr, th);
18411841
nsk = tcp_check_req(sk, skb, req, false, &req_stolen,
18421842
&drop_reason);
1843-
} else {
1844-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
18451843
}
18461844
if (!nsk) {
18471845
reqsk_put(req);
@@ -1897,10 +1895,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
18971895

18981896
nf_reset_ct(skb);
18991897

1900-
if (tcp_filter(sk, skb)) {
1901-
drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1898+
if (tcp_filter(sk, skb, &drop_reason))
19021899
goto discard_and_relse;
1903-
}
1900+
19041901
th = (const struct tcphdr *)skb->data;
19051902
hdr = ipv6_hdr(skb);
19061903
tcp_v6_fill_cb(skb, hdr, th);

0 commit comments

Comments
 (0)