Skip to content

Commit b2d6664

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf 2023-11-21 We've added 19 non-merge commits during the last 4 day(s) which contain a total of 18 files changed, 1043 insertions(+), 416 deletions(-). The main changes are: 1) Fix BPF verifier to validate callbacks as if they are called an unknown number of times in order to fix not detecting some unsafe programs, from Eduard Zingerman. 2) Fix bpf_redirect_peer() handling which missed proper stats accounting for veth and netkit and also generally fix missing stats for the latter, from Peilin Ye, Daniel Borkmann et al. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: selftests/bpf: check if max number of bpf_loop iterations is tracked bpf: keep track of max number of bpf_loop callback iterations selftests/bpf: test widening for iterating callbacks bpf: widening for callback iterators selftests/bpf: tests for iterating callbacks bpf: verify callbacks as if they are called unknown number of times bpf: extract setup_func_entry() utility function bpf: extract __check_reg_arg() utility function selftests/bpf: fix bpf_loop_bench for new callback verification scheme selftests/bpf: track string payload offset as scalar in strobemeta selftests/bpf: track tcp payload offset as scalar in xdp_synproxy selftests/bpf: Add netkit to tc_redirect selftest selftests/bpf: De-veth-ize the tc_redirect test case bpf, netkit: Add indirect call wrapper for fetching peer dev bpf: Fix dev's rx stats for bpf_redirect_peer traffic veth: Use tstats per-CPU traffic counters netkit: Add tstats per-CPU traffic counters net: Move {l,t,d}stats allocation to core and convert veth & vrf net, vrf: Move dstats structure to core ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 495ec91 + acb12c8 commit b2d6664

File tree

18 files changed

+1043
-416
lines changed

18 files changed

+1043
-416
lines changed

drivers/net/netkit.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/filter.h>
88
#include <linux/netfilter_netdev.h>
99
#include <linux/bpf_mprog.h>
10+
#include <linux/indirect_call_wrapper.h>
1011

1112
#include <net/netkit.h>
1213
#include <net/dst.h>
@@ -68,6 +69,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
6869
netdev_tx_t ret_dev = NET_XMIT_SUCCESS;
6970
const struct bpf_mprog_entry *entry;
7071
struct net_device *peer;
72+
int len = skb->len;
7173

7274
rcu_read_lock();
7375
peer = rcu_dereference(nk->peer);
@@ -85,15 +87,22 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
8587
case NETKIT_PASS:
8688
skb->protocol = eth_type_trans(skb, skb->dev);
8789
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
88-
__netif_rx(skb);
90+
if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
91+
dev_sw_netstats_tx_add(dev, 1, len);
92+
dev_sw_netstats_rx_add(peer, len);
93+
} else {
94+
goto drop_stats;
95+
}
8996
break;
9097
case NETKIT_REDIRECT:
98+
dev_sw_netstats_tx_add(dev, 1, len);
9199
skb_do_redirect(skb);
92100
break;
93101
case NETKIT_DROP:
94102
default:
95103
drop:
96104
kfree_skb(skb);
105+
drop_stats:
97106
dev_core_stats_tx_dropped_inc(dev);
98107
ret_dev = NET_XMIT_DROP;
99108
break;
@@ -169,11 +178,18 @@ static void netkit_set_headroom(struct net_device *dev, int headroom)
169178
rcu_read_unlock();
170179
}
171180

172-
static struct net_device *netkit_peer_dev(struct net_device *dev)
181+
INDIRECT_CALLABLE_SCOPE struct net_device *netkit_peer_dev(struct net_device *dev)
173182
{
174183
return rcu_dereference(netkit_priv(dev)->peer);
175184
}
176185

186+
static void netkit_get_stats(struct net_device *dev,
187+
struct rtnl_link_stats64 *stats)
188+
{
189+
dev_fetch_sw_netstats(stats, dev->tstats);
190+
stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
191+
}
192+
177193
static void netkit_uninit(struct net_device *dev);
178194

179195
static const struct net_device_ops netkit_netdev_ops = {
@@ -184,6 +200,7 @@ static const struct net_device_ops netkit_netdev_ops = {
184200
.ndo_set_rx_headroom = netkit_set_headroom,
185201
.ndo_get_iflink = netkit_get_iflink,
186202
.ndo_get_peer_dev = netkit_peer_dev,
203+
.ndo_get_stats64 = netkit_get_stats,
187204
.ndo_uninit = netkit_uninit,
188205
.ndo_features_check = passthru_features_check,
189206
};
@@ -218,6 +235,7 @@ static void netkit_setup(struct net_device *dev)
218235

219236
ether_setup(dev);
220237
dev->max_mtu = ETH_MAX_MTU;
238+
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
221239

222240
dev->flags |= IFF_NOARP;
223241
dev->priv_flags &= ~IFF_TX_SKB_SHARING;

drivers/net/veth.c

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
373373
skb_tx_timestamp(skb);
374374
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
375375
if (!use_napi)
376-
dev_lstats_add(dev, length);
376+
dev_sw_netstats_tx_add(dev, 1, length);
377377
else
378378
__veth_xdp_flush(rq);
379379
} else {
@@ -387,14 +387,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
387387
return ret;
388388
}
389389

390-
static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
391-
{
392-
struct veth_priv *priv = netdev_priv(dev);
393-
394-
dev_lstats_read(dev, packets, bytes);
395-
return atomic64_read(&priv->dropped);
396-
}
397-
398390
static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
399391
{
400392
struct veth_priv *priv = netdev_priv(dev);
@@ -432,24 +424,24 @@ static void veth_get_stats64(struct net_device *dev,
432424
struct veth_priv *priv = netdev_priv(dev);
433425
struct net_device *peer;
434426
struct veth_stats rx;
435-
u64 packets, bytes;
436427

437-
tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
438-
tot->tx_bytes = bytes;
439-
tot->tx_packets = packets;
428+
tot->tx_dropped = atomic64_read(&priv->dropped);
429+
dev_fetch_sw_netstats(tot, dev->tstats);
440430

441431
veth_stats_rx(&rx, dev);
442432
tot->tx_dropped += rx.xdp_tx_err;
443433
tot->rx_dropped = rx.rx_drops + rx.peer_tq_xdp_xmit_err;
444-
tot->rx_bytes = rx.xdp_bytes;
445-
tot->rx_packets = rx.xdp_packets;
434+
tot->rx_bytes += rx.xdp_bytes;
435+
tot->rx_packets += rx.xdp_packets;
446436

447437
rcu_read_lock();
448438
peer = rcu_dereference(priv->peer);
449439
if (peer) {
450-
veth_stats_tx(peer, &packets, &bytes);
451-
tot->rx_bytes += bytes;
452-
tot->rx_packets += packets;
440+
struct rtnl_link_stats64 tot_peer = {};
441+
442+
dev_fetch_sw_netstats(&tot_peer, peer->tstats);
443+
tot->rx_bytes += tot_peer.tx_bytes;
444+
tot->rx_packets += tot_peer.tx_packets;
453445

454446
veth_stats_rx(&rx, peer);
455447
tot->tx_dropped += rx.peer_tq_xdp_xmit_err;
@@ -1506,25 +1498,12 @@ static void veth_free_queues(struct net_device *dev)
15061498

15071499
static int veth_dev_init(struct net_device *dev)
15081500
{
1509-
int err;
1510-
1511-
dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
1512-
if (!dev->lstats)
1513-
return -ENOMEM;
1514-
1515-
err = veth_alloc_queues(dev);
1516-
if (err) {
1517-
free_percpu(dev->lstats);
1518-
return err;
1519-
}
1520-
1521-
return 0;
1501+
return veth_alloc_queues(dev);
15221502
}
15231503

15241504
static void veth_dev_free(struct net_device *dev)
15251505
{
15261506
veth_free_queues(dev);
1527-
free_percpu(dev->lstats);
15281507
}
15291508

15301509
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1796,6 +1775,7 @@ static void veth_setup(struct net_device *dev)
17961775
NETIF_F_HW_VLAN_STAG_RX);
17971776
dev->needs_free_netdev = true;
17981777
dev->priv_destructor = veth_dev_free;
1778+
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
17991779
dev->max_mtu = ETH_MAX_MTU;
18001780

18011781
dev->hw_features = VETH_FEATURES;

drivers/net/vrf.c

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -121,22 +121,12 @@ struct net_vrf {
121121
int ifindex;
122122
};
123123

124-
struct pcpu_dstats {
125-
u64 tx_pkts;
126-
u64 tx_bytes;
127-
u64 tx_drps;
128-
u64 rx_pkts;
129-
u64 rx_bytes;
130-
u64 rx_drps;
131-
struct u64_stats_sync syncp;
132-
};
133-
134124
static void vrf_rx_stats(struct net_device *dev, int len)
135125
{
136126
struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
137127

138128
u64_stats_update_begin(&dstats->syncp);
139-
dstats->rx_pkts++;
129+
dstats->rx_packets++;
140130
dstats->rx_bytes += len;
141131
u64_stats_update_end(&dstats->syncp);
142132
}
@@ -161,10 +151,10 @@ static void vrf_get_stats64(struct net_device *dev,
161151
do {
162152
start = u64_stats_fetch_begin(&dstats->syncp);
163153
tbytes = dstats->tx_bytes;
164-
tpkts = dstats->tx_pkts;
165-
tdrops = dstats->tx_drps;
154+
tpkts = dstats->tx_packets;
155+
tdrops = dstats->tx_drops;
166156
rbytes = dstats->rx_bytes;
167-
rpkts = dstats->rx_pkts;
157+
rpkts = dstats->rx_packets;
168158
} while (u64_stats_fetch_retry(&dstats->syncp, start));
169159
stats->tx_bytes += tbytes;
170160
stats->tx_packets += tpkts;
@@ -421,7 +411,7 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
421411
if (likely(__netif_rx(skb) == NET_RX_SUCCESS))
422412
vrf_rx_stats(dev, len);
423413
else
424-
this_cpu_inc(dev->dstats->rx_drps);
414+
this_cpu_inc(dev->dstats->rx_drops);
425415

426416
return NETDEV_TX_OK;
427417
}
@@ -616,11 +606,11 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
616606
struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);
617607

618608
u64_stats_update_begin(&dstats->syncp);
619-
dstats->tx_pkts++;
609+
dstats->tx_packets++;
620610
dstats->tx_bytes += len;
621611
u64_stats_update_end(&dstats->syncp);
622612
} else {
623-
this_cpu_inc(dev->dstats->tx_drps);
613+
this_cpu_inc(dev->dstats->tx_drops);
624614
}
625615

626616
return ret;
@@ -1174,22 +1164,15 @@ static void vrf_dev_uninit(struct net_device *dev)
11741164

11751165
vrf_rtable_release(dev, vrf);
11761166
vrf_rt6_release(dev, vrf);
1177-
1178-
free_percpu(dev->dstats);
1179-
dev->dstats = NULL;
11801167
}
11811168

11821169
static int vrf_dev_init(struct net_device *dev)
11831170
{
11841171
struct net_vrf *vrf = netdev_priv(dev);
11851172

1186-
dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats);
1187-
if (!dev->dstats)
1188-
goto out_nomem;
1189-
11901173
/* create the default dst which points back to us */
11911174
if (vrf_rtable_create(dev) != 0)
1192-
goto out_stats;
1175+
goto out_nomem;
11931176

11941177
if (vrf_rt6_create(dev) != 0)
11951178
goto out_rth;
@@ -1203,9 +1186,6 @@ static int vrf_dev_init(struct net_device *dev)
12031186

12041187
out_rth:
12051188
vrf_rtable_release(dev, vrf);
1206-
out_stats:
1207-
free_percpu(dev->dstats);
1208-
dev->dstats = NULL;
12091189
out_nomem:
12101190
return -ENOMEM;
12111191
}
@@ -1704,6 +1684,8 @@ static void vrf_setup(struct net_device *dev)
17041684
dev->min_mtu = IPV6_MIN_MTU;
17051685
dev->max_mtu = IP6_MAX_MTU;
17061686
dev->mtu = dev->max_mtu;
1687+
1688+
dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
17071689
}
17081690

17091691
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],

include/linux/bpf_verifier.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,17 @@ struct bpf_func_state {
301301
struct tnum callback_ret_range;
302302
bool in_async_callback_fn;
303303
bool in_exception_callback_fn;
304+
/* For callback calling functions that limit number of possible
305+
* callback executions (e.g. bpf_loop) keeps track of current
306+
* simulated iteration number.
307+
* Value in frame N refers to number of times callback with frame
308+
* N+1 was simulated, e.g. for the following call:
309+
*
310+
* bpf_loop(..., fn, ...); | suppose current frame is N
311+
* | fn would be simulated in frame N+1
312+
* | number of simulations is tracked in frame N
313+
*/
314+
u32 callback_depth;
304315

305316
/* The following fields should be last. See copy_func_state() */
306317
int acquired_refs;
@@ -400,6 +411,7 @@ struct bpf_verifier_state {
400411
struct bpf_idx_pair *jmp_history;
401412
u32 jmp_history_cnt;
402413
u32 dfs_depth;
414+
u32 callback_unroll_depth;
403415
};
404416

405417
#define bpf_get_spilled_reg(slot, frame, mask) \
@@ -511,6 +523,10 @@ struct bpf_insn_aux_data {
511523
* this instruction, regardless of any heuristics
512524
*/
513525
bool force_checkpoint;
526+
/* true if instruction is a call to a helper function that
527+
* accepts callback function as a parameter.
528+
*/
529+
bool calls_callback;
514530
};
515531

516532
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */

include/linux/netdevice.h

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,6 +1797,13 @@ enum netdev_ml_priv_type {
17971797
ML_PRIV_CAN,
17981798
};
17991799

1800+
enum netdev_stat_type {
1801+
NETDEV_PCPU_STAT_NONE,
1802+
NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */
1803+
NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */
1804+
NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */
1805+
};
1806+
18001807
/**
18011808
* struct net_device - The DEVICE structure.
18021809
*
@@ -1991,10 +1998,14 @@ enum netdev_ml_priv_type {
19911998
*
19921999
* @ml_priv: Mid-layer private
19932000
* @ml_priv_type: Mid-layer private type
1994-
* @lstats: Loopback statistics
1995-
* @tstats: Tunnel statistics
1996-
* @dstats: Dummy statistics
1997-
* @vstats: Virtual ethernet statistics
2001+
*
2002+
* @pcpu_stat_type: Type of device statistics which the core should
2003+
* allocate/free: none, lstats, tstats, dstats. none
2004+
* means the driver is handling statistics allocation/
2005+
* freeing internally.
2006+
* @lstats: Loopback statistics: packets, bytes
2007+
* @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes
2008+
* @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes
19982009
*
19992010
* @garp_port: GARP
20002011
* @mrp_port: MRP
@@ -2354,6 +2365,7 @@ struct net_device {
23542365
void *ml_priv;
23552366
enum netdev_ml_priv_type ml_priv_type;
23562367

2368+
enum netdev_stat_type pcpu_stat_type:8;
23572369
union {
23582370
struct pcpu_lstats __percpu *lstats;
23592371
struct pcpu_sw_netstats __percpu *tstats;
@@ -2755,6 +2767,16 @@ struct pcpu_sw_netstats {
27552767
struct u64_stats_sync syncp;
27562768
} __aligned(4 * sizeof(u64));
27572769

2770+
struct pcpu_dstats {
2771+
u64 rx_packets;
2772+
u64 rx_bytes;
2773+
u64 rx_drops;
2774+
u64 tx_packets;
2775+
u64 tx_bytes;
2776+
u64 tx_drops;
2777+
struct u64_stats_sync syncp;
2778+
} __aligned(8 * sizeof(u64));
2779+
27582780
struct pcpu_lstats {
27592781
u64_stats_t packets;
27602782
u64_stats_t bytes;

include/net/netkit.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
1010
int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
1111
int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
1212
int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
13+
INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
1314
#else
1415
static inline int netkit_prog_attach(const union bpf_attr *attr,
1516
struct bpf_prog *prog)
@@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr,
3435
{
3536
return -EINVAL;
3637
}
38+
39+
static inline struct net_device *netkit_peer_dev(struct net_device *dev)
40+
{
41+
return NULL;
42+
}
3743
#endif /* CONFIG_NETKIT */
3844
#endif /* __NET_NETKIT_H */

0 commit comments

Comments
 (0)