Skip to content

Commit c343966

Browse files
committed
Florian Westphal says: ==================== netfilter: updates for net-next First patch gets rid of refcounting for dying list dumping, use a cookie value instead of keeping the object around. Remaining patches extend nftables pipapo (concatenated ranges) set type. Make the AVX2 optimized version available from the control plane as well, then use it during insert. This gives a nice speedup for large sets. All from myself. On PREEMPT_RT, we can't rely on local_bh_disable to protect the access to the percpu scratch maps. Use nested-BH locking for this, From Sebastian Siewior. * tag 'nf-next-25-08-20' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: netfilter: nft_set_pipapo: Use nested-BH locking for nft_pipapo_scratch netfilter: nft_set_pipapo: Store real pointer, adjust later. netfilter: nft_set_pipapo: use avx2 algorithm for insertions too netfilter: nft_set_pipapo_avx2: split lookup function in two parts netfilter: nft_set_pipapo_avx2: Drop the comment regarding protection netfilter: ctnetlink: remove refcounting in dying list dumping ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents dac7213 + 456010c commit c343966

File tree

5 files changed

+155
-124
lines changed

5 files changed

+155
-124
lines changed

net/netfilter/nf_conntrack_netlink.c

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ MODULE_LICENSE("GPL");
6060
MODULE_DESCRIPTION("List and change connection tracking table");
6161

6262
struct ctnetlink_list_dump_ctx {
63-
struct nf_conn *last;
63+
unsigned long last_id;
6464
unsigned int cpu;
6565
bool done;
6666
};
@@ -1733,16 +1733,6 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
17331733
return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
17341734
}
17351735

1736-
static int ctnetlink_done_list(struct netlink_callback *cb)
1737-
{
1738-
struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
1739-
1740-
if (ctx->last)
1741-
nf_ct_put(ctx->last);
1742-
1743-
return 0;
1744-
}
1745-
17461736
#ifdef CONFIG_NF_CONNTRACK_EVENTS
17471737
static int ctnetlink_dump_one_entry(struct sk_buff *skb,
17481738
struct netlink_callback *cb,
@@ -1757,11 +1747,11 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb,
17571747
if (l3proto && nf_ct_l3num(ct) != l3proto)
17581748
return 0;
17591749

1760-
if (ctx->last) {
1761-
if (ct != ctx->last)
1750+
if (ctx->last_id) {
1751+
if (ctnetlink_get_id(ct) != ctx->last_id)
17621752
return 0;
17631753

1764-
ctx->last = NULL;
1754+
ctx->last_id = 0;
17651755
}
17661756

17671757
/* We can't dump extension info for the unconfirmed
@@ -1775,12 +1765,8 @@ static int ctnetlink_dump_one_entry(struct sk_buff *skb,
17751765
cb->nlh->nlmsg_seq,
17761766
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
17771767
ct, dying, 0);
1778-
if (res < 0) {
1779-
if (!refcount_inc_not_zero(&ct->ct_general.use))
1780-
return 0;
1781-
1782-
ctx->last = ct;
1783-
}
1768+
if (res < 0)
1769+
ctx->last_id = ctnetlink_get_id(ct);
17841770

17851771
return res;
17861772
}
@@ -1796,18 +1782,18 @@ static int
17961782
ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
17971783
{
17981784
struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
1799-
struct nf_conn *last = ctx->last;
18001785
#ifdef CONFIG_NF_CONNTRACK_EVENTS
18011786
const struct net *net = sock_net(skb->sk);
18021787
struct nf_conntrack_net_ecache *ecache_net;
1788+
unsigned long last_id = ctx->last_id;
18031789
struct nf_conntrack_tuple_hash *h;
18041790
struct hlist_nulls_node *n;
18051791
#endif
18061792

18071793
if (ctx->done)
18081794
return 0;
18091795

1810-
ctx->last = NULL;
1796+
ctx->last_id = 0;
18111797

18121798
#ifdef CONFIG_NF_CONNTRACK_EVENTS
18131799
ecache_net = nf_conn_pernet_ecache(net);
@@ -1818,24 +1804,21 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
18181804
int res;
18191805

18201806
ct = nf_ct_tuplehash_to_ctrack(h);
1821-
if (last && last != ct)
1807+
if (last_id && last_id != ctnetlink_get_id(ct))
18221808
continue;
18231809

18241810
res = ctnetlink_dump_one_entry(skb, cb, ct, true);
18251811
if (res < 0) {
18261812
spin_unlock_bh(&ecache_net->dying_lock);
1827-
nf_ct_put(last);
18281813
return skb->len;
18291814
}
18301815

1831-
nf_ct_put(last);
1832-
last = NULL;
1816+
last_id = 0;
18331817
}
18341818

18351819
spin_unlock_bh(&ecache_net->dying_lock);
18361820
#endif
18371821
ctx->done = true;
1838-
nf_ct_put(last);
18391822

18401823
return skb->len;
18411824
}
@@ -1847,7 +1830,6 @@ static int ctnetlink_get_ct_dying(struct sk_buff *skb,
18471830
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
18481831
struct netlink_dump_control c = {
18491832
.dump = ctnetlink_dump_dying,
1850-
.done = ctnetlink_done_list,
18511833
};
18521834
return netlink_dump_start(info->sk, skb, info->nlh, &c);
18531835
}
@@ -1862,7 +1844,6 @@ static int ctnetlink_get_ct_unconfirmed(struct sk_buff *skb,
18621844
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
18631845
struct netlink_dump_control c = {
18641846
.dump = ctnetlink_dump_unconfirmed,
1865-
.done = ctnetlink_done_list,
18661847
};
18671848
return netlink_dump_start(info->sk, skb, info->nlh, &c);
18681849
}

net/netfilter/nft_set_pipapo.c

Lines changed: 53 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
397397
}
398398

399399
/**
400-
* pipapo_get() - Get matching element reference given key data
400+
* pipapo_get_slow() - Get matching element reference given key data
401401
* @m: storage containing the set elements
402402
* @data: Key data to be matched against existing elements
403403
* @genmask: If set, check that element is active in given genmask
@@ -414,12 +414,12 @@ int pipapo_refill(unsigned long *map, unsigned int len, unsigned int rules,
414414
*
415415
* Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
416416
*/
417-
static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
418-
const u8 *data, u8 genmask,
419-
u64 tstamp)
417+
static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m,
418+
const u8 *data, u8 genmask,
419+
u64 tstamp)
420420
{
421+
unsigned long *res_map, *fill_map, *map;
421422
struct nft_pipapo_scratch *scratch;
422-
unsigned long *res_map, *fill_map;
423423
const struct nft_pipapo_field *f;
424424
bool map_index;
425425
int i;
@@ -429,11 +429,13 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
429429
scratch = *raw_cpu_ptr(m->scratch);
430430
if (unlikely(!scratch))
431431
goto out;
432+
__local_lock_nested_bh(&scratch->bh_lock);
432433

433434
map_index = scratch->map_index;
434435

435-
res_map = scratch->map + (map_index ? m->bsize_max : 0);
436-
fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
436+
map = NFT_PIPAPO_LT_ALIGN(&scratch->__map[0]);
437+
res_map = map + (map_index ? m->bsize_max : 0);
438+
fill_map = map + (map_index ? 0 : m->bsize_max);
437439

438440
pipapo_resmap_init(m, res_map);
439441

@@ -464,6 +466,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
464466
last);
465467
if (b < 0) {
466468
scratch->map_index = map_index;
469+
__local_unlock_nested_bh(&scratch->bh_lock);
467470
local_bh_enable();
468471

469472
return NULL;
@@ -483,6 +486,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
483486
* *next* bitmap (not initial) for the next packet.
484487
*/
485488
scratch->map_index = map_index;
489+
__local_unlock_nested_bh(&scratch->bh_lock);
486490
local_bh_enable();
487491
return e;
488492
}
@@ -497,11 +501,47 @@ static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
497501
data += NFT_PIPAPO_GROUPS_PADDING(f);
498502
}
499503

504+
__local_unlock_nested_bh(&scratch->bh_lock);
500505
out:
501506
local_bh_enable();
502507
return NULL;
503508
}
504509

510+
/**
511+
* pipapo_get() - Get matching element reference given key data
512+
* @m: Storage containing the set elements
513+
* @data: Key data to be matched against existing elements
514+
* @genmask: If set, check that element is active in given genmask
515+
* @tstamp: Timestamp to check for expired elements
516+
*
517+
* This is a dispatcher function, either calling out the generic C
518+
* implementation or, if available, the AVX2 one.
519+
* This helper is only called from the control plane, with either RCU
520+
* read lock or transaction mutex held.
521+
*
522+
* Return: pointer to &struct nft_pipapo_elem on match, NULL otherwise.
523+
*/
524+
static struct nft_pipapo_elem *pipapo_get(const struct nft_pipapo_match *m,
525+
const u8 *data, u8 genmask,
526+
u64 tstamp)
527+
{
528+
struct nft_pipapo_elem *e;
529+
530+
local_bh_disable();
531+
532+
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
533+
if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX) &&
534+
irq_fpu_usable()) {
535+
e = pipapo_get_avx2(m, data, genmask, tstamp);
536+
local_bh_enable();
537+
return e;
538+
}
539+
#endif
540+
e = pipapo_get_slow(m, data, genmask, tstamp);
541+
local_bh_enable();
542+
return e;
543+
}
544+
505545
/**
506546
* nft_pipapo_lookup() - Dataplane fronted for main lookup function
507547
* @net: Network namespace
@@ -523,7 +563,7 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
523563
const struct nft_pipapo_elem *e;
524564

525565
m = rcu_dereference(priv->match);
526-
e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
566+
e = pipapo_get_slow(m, (const u8 *)key, genmask, get_jiffies_64());
527567

528568
return e ? &e->ext : NULL;
529569
}
@@ -1136,22 +1176,17 @@ static void pipapo_map(struct nft_pipapo_match *m,
11361176
}
11371177

11381178
/**
1139-
* pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
1179+
* pipapo_free_scratch() - Free per-CPU map at original address
11401180
* @m: Matching data
11411181
* @cpu: CPU number
11421182
*/
11431183
static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
11441184
{
11451185
struct nft_pipapo_scratch *s;
1146-
void *mem;
11471186

11481187
s = *per_cpu_ptr(m->scratch, cpu);
1149-
if (!s)
1150-
return;
11511188

1152-
mem = s;
1153-
mem -= s->align_off;
1154-
kvfree(mem);
1189+
kvfree(s);
11551190
}
11561191

11571192
/**
@@ -1168,11 +1203,8 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
11681203

11691204
for_each_possible_cpu(i) {
11701205
struct nft_pipapo_scratch *scratch;
1171-
#ifdef NFT_PIPAPO_ALIGN
1172-
void *scratch_aligned;
1173-
u32 align_off;
1174-
#endif
1175-
scratch = kvzalloc_node(struct_size(scratch, map, bsize_max * 2) +
1206+
1207+
scratch = kvzalloc_node(struct_size(scratch, __map, bsize_max * 2) +
11761208
NFT_PIPAPO_ALIGN_HEADROOM,
11771209
GFP_KERNEL_ACCOUNT, cpu_to_node(i));
11781210
if (!scratch) {
@@ -1187,23 +1219,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
11871219
}
11881220

11891221
pipapo_free_scratch(clone, i);
1190-
1191-
#ifdef NFT_PIPAPO_ALIGN
1192-
/* Align &scratch->map (not the struct itself): the extra
1193-
* %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
1194-
* above guarantee we can waste up to those bytes in order
1195-
* to align the map field regardless of its offset within
1196-
* the struct.
1197-
*/
1198-
BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
1199-
1200-
scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
1201-
scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
1202-
align_off = scratch_aligned - (void *)scratch;
1203-
1204-
scratch = scratch_aligned;
1205-
scratch->align_off = align_off;
1206-
#endif
1222+
local_lock_init(&scratch->bh_lock);
12071223
*per_cpu_ptr(clone->scratch, i) = scratch;
12081224
}
12091225

net/netfilter/nft_set_pipapo.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,14 +124,14 @@ struct nft_pipapo_field {
124124

125125
/**
126126
* struct nft_pipapo_scratch - percpu data used for lookup and matching
127+
* @bh_lock: PREEMPT_RT local spinlock
127128
* @map_index: Current working bitmap index, toggled between field matches
128-
* @align_off: Offset to get the originally allocated address
129-
* @map: store partial matching results during lookup
129+
* @__map: store partial matching results during lookup
130130
*/
131131
struct nft_pipapo_scratch {
132+
local_lock_t bh_lock;
132133
u8 map_index;
133-
u32 align_off;
134-
unsigned long map[];
134+
unsigned long __map[];
135135
};
136136

137137
/**

0 commit comments

Comments
 (0)