Skip to content

Commit 3a1a66d

Browse files
committed
Florian Westpha says: ==================== netfilter pull request nf-25-09-10 First patch adds a lockdep annotation for a false-positive splat. Last patch adds formal reviewer tag for Phil Sutter to MAINTAINERS. Rest of the patches resolve spurious false negative results during set lookups while another CPU is processing a transaction. This has been broken at least since v4.18 when an unconditional synchronize_rcu call was removed from the commit phase of nf_tables. Quoting from Stefan Hanreichs original report: It seems like we've found an issue with atomicity when reloading nftables rulesets. Sometimes there is a small window where rules containing sets do not seem to apply to incoming traffic, due to the set apparently being empty for a short amount of time when flushing / adding elements. Exanple ruleset: table ip filter { set match { type ipv4_addr flags interval elements = { 0.0.0.0-192.168.2.19, 192.168.2.21-255.255.255.255 } } chain pre { type filter hook prerouting priority filter; policy accept; ip saddr @match accept counter comment "must never match" } } Reproducer transaction: while true: nft -f -<<EOF flush set ip filter match create element ip filter match { \ 0.0.0.0-192.168.2.19, 192.168.2.21-255.255.255.255 } EOF done Then create traffic. to/from e.g. 192.168.2.1 to 192.168.3.10. Once in a while the counter will increment even though the 'ip saddr @match' rule should have accepted the packet. See individual patches for details. Thanks to Stefan Hanreich for an initial description and reproducer for this bug and to Pablo Neira Ayuso for reviewing earlier iterations of the patchset. * tag 'nf-25-09-10-v2' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: MAINTAINERS: add Phil as netfilter reviewer netfilter: nf_tables: restart set lookup on base_seq change netfilter: nf_tables: make nft_set_do_lookup available unconditionally netfilter: nf_tables: place base_seq in struct net netfilter: nft_set_rbtree: continue traversal if element is inactive netfilter: nft_set_pipapo: don't check genbit from packetpath lookups netfilter: nft_set_bitmap: fix lockdep splat due to missing annotation ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents ccf78f7 + 37a9675 commit 3a1a66d

File tree

10 files changed

+103
-55
lines changed

10 files changed

+103
-55
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17480,6 +17480,7 @@ NETFILTER
1748017480
M: Pablo Neira Ayuso <[email protected]>
1748117481
M: Jozsef Kadlecsik <[email protected]>
1748217482
M: Florian Westphal <[email protected]>
17483+
R: Phil Sutter <[email protected]>
1748317484
1748417485
1748517486
S: Maintained

include/net/netfilter/nf_tables.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1912,7 +1912,6 @@ struct nftables_pernet {
19121912
struct mutex commit_mutex;
19131913
u64 table_handle;
19141914
u64 tstamp;
1915-
unsigned int base_seq;
19161915
unsigned int gc_seq;
19171916
u8 validate_state;
19181917
struct work_struct destroy_work;

include/net/netfilter/nf_tables_core.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,17 +109,11 @@ nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
109109
const struct nft_set_ext *
110110
nft_hash_lookup(const struct net *net, const struct nft_set *set,
111111
const u32 *key);
112+
#endif
113+
112114
const struct nft_set_ext *
113115
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
114116
const u32 *key);
115-
#else
116-
static inline const struct nft_set_ext *
117-
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
118-
const u32 *key)
119-
{
120-
return set->ops->lookup(net, set, key);
121-
}
122-
#endif
123117

124118
/* called from nft_pipapo_avx2.c */
125119
const struct nft_set_ext *

include/net/netns/nftables.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define _NETNS_NFTABLES_H_
44

55
struct netns_nftables {
6+
unsigned int base_seq;
67
u8 gencursor;
78
};
89

net/netfilter/nf_tables_api.c

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,11 +1131,14 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
11311131
return ERR_PTR(-ENOENT);
11321132
}
11331133

1134-
static __be16 nft_base_seq(const struct net *net)
1134+
static unsigned int nft_base_seq(const struct net *net)
11351135
{
1136-
struct nftables_pernet *nft_net = nft_pernet(net);
1136+
return READ_ONCE(net->nft.base_seq);
1137+
}
11371138

1138-
return htons(nft_net->base_seq & 0xffff);
1139+
static __be16 nft_base_seq_be16(const struct net *net)
1140+
{
1141+
return htons(nft_base_seq(net) & 0xffff);
11391142
}
11401143

11411144
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
@@ -1155,7 +1158,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
11551158

11561159
nlh = nfnl_msg_put(skb, portid, seq,
11571160
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
1158-
flags, family, NFNETLINK_V0, nft_base_seq(net));
1161+
flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
11591162
if (!nlh)
11601163
goto nla_put_failure;
11611164

@@ -1248,7 +1251,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
12481251

12491252
rcu_read_lock();
12501253
nft_net = nft_pernet(net);
1251-
cb->seq = READ_ONCE(nft_net->base_seq);
1254+
cb->seq = nft_base_seq(net);
12521255

12531256
list_for_each_entry_rcu(table, &nft_net->tables, list) {
12541257
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -2030,7 +2033,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
20302033

20312034
nlh = nfnl_msg_put(skb, portid, seq,
20322035
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
2033-
flags, family, NFNETLINK_V0, nft_base_seq(net));
2036+
flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
20342037
if (!nlh)
20352038
goto nla_put_failure;
20362039

@@ -2133,7 +2136,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
21332136

21342137
rcu_read_lock();
21352138
nft_net = nft_pernet(net);
2136-
cb->seq = READ_ONCE(nft_net->base_seq);
2139+
cb->seq = nft_base_seq(net);
21372140

21382141
list_for_each_entry_rcu(table, &nft_net->tables, list) {
21392142
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -3671,7 +3674,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
36713674
u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
36723675

36733676
nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
3674-
nft_base_seq(net));
3677+
nft_base_seq_be16(net));
36753678
if (!nlh)
36763679
goto nla_put_failure;
36773680

@@ -3839,7 +3842,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
38393842

38403843
rcu_read_lock();
38413844
nft_net = nft_pernet(net);
3842-
cb->seq = READ_ONCE(nft_net->base_seq);
3845+
cb->seq = nft_base_seq(net);
38433846

38443847
list_for_each_entry_rcu(table, &nft_net->tables, list) {
38453848
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -4050,7 +4053,7 @@ static int nf_tables_getrule_reset(struct sk_buff *skb,
40504053
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
40514054
nla_len(nla[NFTA_RULE_TABLE]),
40524055
(char *)nla_data(nla[NFTA_RULE_TABLE]),
4053-
nft_net->base_seq);
4056+
nft_base_seq(net));
40544057
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
40554058
AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
40564059
kfree(buf);
@@ -4887,7 +4890,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
48874890
nlh = nfnl_msg_put(skb, portid, seq,
48884891
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
48894892
flags, ctx->family, NFNETLINK_V0,
4890-
nft_base_seq(ctx->net));
4893+
nft_base_seq_be16(ctx->net));
48914894
if (!nlh)
48924895
goto nla_put_failure;
48934896

@@ -5032,7 +5035,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
50325035

50335036
rcu_read_lock();
50345037
nft_net = nft_pernet(net);
5035-
cb->seq = READ_ONCE(nft_net->base_seq);
5038+
cb->seq = nft_base_seq(net);
50365039

50375040
list_for_each_entry_rcu(table, &nft_net->tables, list) {
50385041
if (ctx->family != NFPROTO_UNSPEC &&
@@ -6209,7 +6212,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
62096212

62106213
rcu_read_lock();
62116214
nft_net = nft_pernet(net);
6212-
cb->seq = READ_ONCE(nft_net->base_seq);
6215+
cb->seq = nft_base_seq(net);
62136216

62146217
list_for_each_entry_rcu(table, &nft_net->tables, list) {
62156218
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
@@ -6238,7 +6241,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
62386241
seq = cb->nlh->nlmsg_seq;
62396242

62406243
nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
6241-
table->family, NFNETLINK_V0, nft_base_seq(net));
6244+
table->family, NFNETLINK_V0, nft_base_seq_be16(net));
62426245
if (!nlh)
62436246
goto nla_put_failure;
62446247

@@ -6331,7 +6334,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
63316334

63326335
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
63336336
nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
6334-
NFNETLINK_V0, nft_base_seq(ctx->net));
6337+
NFNETLINK_V0, nft_base_seq_be16(ctx->net));
63356338
if (!nlh)
63366339
goto nla_put_failure;
63376340

@@ -6630,7 +6633,7 @@ static int nf_tables_getsetelem_reset(struct sk_buff *skb,
66306633
}
66316634
nelems++;
66326635
}
6633-
audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems);
6636+
audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems);
66346637

66356638
out_unlock:
66366639
rcu_read_unlock();
@@ -8381,7 +8384,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
83818384

83828385
nlh = nfnl_msg_put(skb, portid, seq,
83838386
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
8384-
flags, family, NFNETLINK_V0, nft_base_seq(net));
8387+
flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
83858388
if (!nlh)
83868389
goto nla_put_failure;
83878390

@@ -8446,7 +8449,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
84468449

84478450
rcu_read_lock();
84488451
nft_net = nft_pernet(net);
8449-
cb->seq = READ_ONCE(nft_net->base_seq);
8452+
cb->seq = nft_base_seq(net);
84508453

84518454
list_for_each_entry_rcu(table, &nft_net->tables, list) {
84528455
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -8480,7 +8483,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
84808483
idx++;
84818484
}
84828485
if (ctx->reset && entries)
8483-
audit_log_obj_reset(table, nft_net->base_seq, entries);
8486+
audit_log_obj_reset(table, nft_base_seq(net), entries);
84848487
if (rc < 0)
84858488
break;
84868489
}
@@ -8649,7 +8652,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb,
86498652
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
86508653
nla_len(nla[NFTA_OBJ_TABLE]),
86518654
(char *)nla_data(nla[NFTA_OBJ_TABLE]),
8652-
nft_net->base_seq);
8655+
nft_base_seq(net));
86538656
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
86548657
AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
86558658
kfree(buf);
@@ -8754,9 +8757,8 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
87548757
struct nft_object *obj, u32 portid, u32 seq, int event,
87558758
u16 flags, int family, int report, gfp_t gfp)
87568759
{
8757-
struct nftables_pernet *nft_net = nft_pernet(net);
87588760
char *buf = kasprintf(gfp, "%s:%u",
8759-
table->name, nft_net->base_seq);
8761+
table->name, nft_base_seq(net));
87608762

87618763
audit_log_nfcfg(buf,
87628764
family,
@@ -9442,7 +9444,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
94429444

94439445
nlh = nfnl_msg_put(skb, portid, seq,
94449446
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
9445-
flags, family, NFNETLINK_V0, nft_base_seq(net));
9447+
flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
94469448
if (!nlh)
94479449
goto nla_put_failure;
94489450

@@ -9511,7 +9513,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
95119513

95129514
rcu_read_lock();
95139515
nft_net = nft_pernet(net);
9514-
cb->seq = READ_ONCE(nft_net->base_seq);
9516+
cb->seq = nft_base_seq(net);
95159517

95169518
list_for_each_entry_rcu(table, &nft_net->tables, list) {
95179519
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -9696,17 +9698,16 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
96969698
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
96979699
u32 portid, u32 seq)
96989700
{
9699-
struct nftables_pernet *nft_net = nft_pernet(net);
97009701
struct nlmsghdr *nlh;
97019702
char buf[TASK_COMM_LEN];
97029703
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
97039704

97049705
nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
9705-
NFNETLINK_V0, nft_base_seq(net));
9706+
NFNETLINK_V0, nft_base_seq_be16(net));
97069707
if (!nlh)
97079708
goto nla_put_failure;
97089709

9709-
if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
9710+
if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_base_seq(net))) ||
97109711
nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
97119712
nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
97129713
goto nla_put_failure;
@@ -10968,11 +10969,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
1096810969
* Bump generation counter, invalidate any dump in progress.
1096910970
* Cannot fail after this point.
1097010971
*/
10971-
base_seq = READ_ONCE(nft_net->base_seq);
10972+
base_seq = nft_base_seq(net);
1097210973
while (++base_seq == 0)
1097310974
;
1097410975

10975-
WRITE_ONCE(nft_net->base_seq, base_seq);
10976+
/* pairs with smp_load_acquire in nft_lookup_eval */
10977+
smp_store_release(&net->nft.base_seq, base_seq);
1097610978

1097710979
gc_seq = nft_gc_seq_begin(nft_net);
1097810980

@@ -11181,7 +11183,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
1118111183

1118211184
nft_commit_notify(net, NETLINK_CB(skb).portid);
1118311185
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
11184-
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
11186+
nf_tables_commit_audit_log(&adl, nft_base_seq(net));
1118511187

1118611188
nft_gc_seq_end(nft_net, gc_seq);
1118711189
nft_net->validate_state = NFT_VALIDATE_SKIP;
@@ -11506,7 +11508,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
1150611508
mutex_lock(&nft_net->commit_mutex);
1150711509
nft_net->tstamp = get_jiffies_64();
1150811510

11509-
genid_ok = genid == 0 || nft_net->base_seq == genid;
11511+
genid_ok = genid == 0 || nft_base_seq(net) == genid;
1151011512
if (!genid_ok)
1151111513
mutex_unlock(&nft_net->commit_mutex);
1151211514

@@ -12143,7 +12145,7 @@ static int __net_init nf_tables_init_net(struct net *net)
1214312145
INIT_LIST_HEAD(&nft_net->module_list);
1214412146
INIT_LIST_HEAD(&nft_net->notify_list);
1214512147
mutex_init(&nft_net->commit_mutex);
12146-
nft_net->base_seq = 1;
12148+
net->nft.base_seq = 1;
1214712149
nft_net->gc_seq = 0;
1214812150
nft_net->validate_state = NFT_VALIDATE_SKIP;
1214912151
INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);

net/netfilter/nft_lookup.c

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ struct nft_lookup {
2424
struct nft_set_binding binding;
2525
};
2626

27-
#ifdef CONFIG_MITIGATION_RETPOLINE
28-
const struct nft_set_ext *
29-
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
30-
const u32 *key)
27+
static const struct nft_set_ext *
28+
__nft_set_do_lookup(const struct net *net, const struct nft_set *set,
29+
const u32 *key)
3130
{
31+
#ifdef CONFIG_MITIGATION_RETPOLINE
3232
if (set->ops == &nft_set_hash_fast_type.ops)
3333
return nft_hash_lookup_fast(net, set, key);
3434
if (set->ops == &nft_set_hash_type.ops)
@@ -51,10 +51,46 @@ nft_set_do_lookup(const struct net *net, const struct nft_set *set,
5151
return nft_rbtree_lookup(net, set, key);
5252

5353
WARN_ON_ONCE(1);
54+
#endif
5455
return set->ops->lookup(net, set, key);
5556
}
57+
58+
static unsigned int nft_base_seq(const struct net *net)
59+
{
60+
/* pairs with smp_store_release() in nf_tables_commit() */
61+
return smp_load_acquire(&net->nft.base_seq);
62+
}
63+
64+
static bool nft_lookup_should_retry(const struct net *net, unsigned int seq)
65+
{
66+
return unlikely(seq != nft_base_seq(net));
67+
}
68+
69+
const struct nft_set_ext *
70+
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
71+
const u32 *key)
72+
{
73+
const struct nft_set_ext *ext;
74+
unsigned int base_seq;
75+
76+
do {
77+
base_seq = nft_base_seq(net);
78+
79+
ext = __nft_set_do_lookup(net, set, key);
80+
if (ext)
81+
break;
82+
/* No match? There is a small chance that lookup was
83+
* performed in the old generation, but nf_tables_commit()
84+
* already unlinked a (matching) element.
85+
*
86+
* We need to repeat the lookup to make sure that we didn't
87+
* miss a matching element in the new generation.
88+
*/
89+
} while (nft_lookup_should_retry(net, base_seq));
90+
91+
return ext;
92+
}
5693
EXPORT_SYMBOL_GPL(nft_set_do_lookup);
57-
#endif
5894

5995
void nft_lookup_eval(const struct nft_expr *expr,
6096
struct nft_regs *regs,

net/netfilter/nft_set_bitmap.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
226226
const struct nft_bitmap *priv = nft_set_priv(set);
227227
struct nft_bitmap_elem *be;
228228

229-
list_for_each_entry_rcu(be, &priv->list, head) {
229+
list_for_each_entry_rcu(be, &priv->list, head,
230+
lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) {
230231
if (iter->count < iter->skip)
231232
goto cont;
232233

0 commit comments

Comments
 (0)