Skip to content

Commit 9c5af2d

Browse files
author
Paolo Abeni
committed
Merge tag 'nf-24-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter fixes for net: 1) Ignores ifindex for types other than mcast/linklocal in ipv6 frag reasm, from Tom Hughes. 2) Initialize extack for begin/end netlink message marker in batch, from Donald Hunter. 3) Initialize extack for flowtable offload support, also from Donald. 4) Dropped packets with cloned unconfirmed conntracks in nfqueue, later it should be possible to explore lookup after reinject but Florian prefers this approach at this stage. From Florian Westphal. 5) Add selftest for cloned unconfirmed conntracks in nfqueue for previous update. 6) Audit after filling netlink header successfully in object dump, from Phil Sutter. 7-8) Fix concurrent dump and reset which could result in underflow counter / quota objects. netfilter pull request 24-08-15 * tag 'nf-24-08-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: netfilter: nf_tables: Add locking for NFT_MSG_GETOBJ_RESET requests netfilter: nf_tables: Introduce nf_tables_getobj_single netfilter: nf_tables: Audit log dump reset after the fact selftests: netfilter: add test for br_netfilter+conntrack+queue combination netfilter: nf_queue: drop packets with cloned unconfirmed conntracks netfilter: flowtable: initialise extack before use netfilter: nfnetlink: Initialise extack before use in ACKs netfilter: allow ipv6 fragments to arrive on different devices ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
2 parents 34dfdf2 + bd662c4 commit 9c5af2d

File tree

8 files changed

+228
-50
lines changed

8 files changed

+228
-50
lines changed

net/bridge/br_netfilter_hooks.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv,
622622
if (likely(nf_ct_is_confirmed(ct)))
623623
return NF_ACCEPT;
624624

625+
if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) {
626+
nf_reset_ct(skb);
627+
return NF_ACCEPT;
628+
}
629+
625630
WARN_ON_ONCE(skb_shared(skb));
626-
WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
627631

628632
/* We can't call nf_confirm here, it would create a dependency
629633
* on nf_conntrack module.

net/ipv6/netfilter/nf_conntrack_reasm.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
154154
};
155155
struct inet_frag_queue *q;
156156

157+
if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
158+
IPV6_ADDR_LINKLOCAL)))
159+
key.iif = 0;
160+
157161
q = inet_frag_find(nf_frag->fqdir, &key);
158162
if (!q)
159163
return NULL;

net/netfilter/nf_flow_table_offload.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
841841
struct list_head *block_cb_list)
842842
{
843843
struct flow_cls_offload cls_flow = {};
844+
struct netlink_ext_ack extack = {};
844845
struct flow_block_cb *block_cb;
845-
struct netlink_ext_ack extack;
846846
__be16 proto = ETH_P_ALL;
847847
int err, i = 0;
848848

net/netfilter/nf_tables_api.c

Lines changed: 102 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8020,6 +8020,19 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
80208020
return skb->len;
80218021
}
80228022

8023+
static int nf_tables_dumpreset_obj(struct sk_buff *skb,
8024+
struct netlink_callback *cb)
8025+
{
8026+
struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
8027+
int ret;
8028+
8029+
mutex_lock(&nft_net->commit_mutex);
8030+
ret = nf_tables_dump_obj(skb, cb);
8031+
mutex_unlock(&nft_net->commit_mutex);
8032+
8033+
return ret;
8034+
}
8035+
80238036
static int nf_tables_dump_obj_start(struct netlink_callback *cb)
80248037
{
80258038
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -8036,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb)
80368049
if (nla[NFTA_OBJ_TYPE])
80378050
ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
80388051

8039-
if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
8040-
ctx->reset = true;
8041-
80428052
return 0;
80438053
}
80448054

8055+
static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb)
8056+
{
8057+
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
8058+
8059+
ctx->reset = true;
8060+
8061+
return nf_tables_dump_obj_start(cb);
8062+
}
8063+
80458064
static int nf_tables_dump_obj_done(struct netlink_callback *cb)
80468065
{
80478066
struct nft_obj_dump_ctx *ctx = (void *)cb->ctx;
@@ -8052,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
80528071
}
80538072

80548073
/* called with rcu_read_lock held */
8055-
static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
8056-
const struct nlattr * const nla[])
8074+
static struct sk_buff *
8075+
nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
8076+
const struct nlattr * const nla[], bool reset)
80578077
{
80588078
struct netlink_ext_ack *extack = info->extack;
80598079
u8 genmask = nft_genmask_cur(info->net);
@@ -8062,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
80628082
struct net *net = info->net;
80638083
struct nft_object *obj;
80648084
struct sk_buff *skb2;
8065-
bool reset = false;
80668085
u32 objtype;
80678086
int err;
80688087

8069-
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
8070-
struct netlink_dump_control c = {
8071-
.start = nf_tables_dump_obj_start,
8072-
.dump = nf_tables_dump_obj,
8073-
.done = nf_tables_dump_obj_done,
8074-
.module = THIS_MODULE,
8075-
.data = (void *)nla,
8076-
};
8077-
8078-
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
8079-
}
8080-
80818088
if (!nla[NFTA_OBJ_NAME] ||
80828089
!nla[NFTA_OBJ_TYPE])
8083-
return -EINVAL;
8090+
return ERR_PTR(-EINVAL);
80848091

80858092
table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0);
80868093
if (IS_ERR(table)) {
80878094
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]);
8088-
return PTR_ERR(table);
8095+
return ERR_CAST(table);
80898096
}
80908097

80918098
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
80928099
obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
80938100
if (IS_ERR(obj)) {
80948101
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
8095-
return PTR_ERR(obj);
8102+
return ERR_CAST(obj);
80968103
}
80978104

80988105
skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
80998106
if (!skb2)
8100-
return -ENOMEM;
8107+
return ERR_PTR(-ENOMEM);
81018108

8102-
if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET)
8103-
reset = true;
8109+
err = nf_tables_fill_obj_info(skb2, net, portid,
8110+
info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
8111+
family, table, obj, reset);
8112+
if (err < 0) {
8113+
kfree_skb(skb2);
8114+
return ERR_PTR(err);
8115+
}
81048116

8105-
if (reset) {
8106-
const struct nftables_pernet *nft_net;
8107-
char *buf;
8117+
return skb2;
8118+
}
81088119

8109-
nft_net = nft_pernet(net);
8110-
buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq);
8120+
static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
8121+
const struct nlattr * const nla[])
8122+
{
8123+
u32 portid = NETLINK_CB(skb).portid;
8124+
struct sk_buff *skb2;
81118125

8112-
audit_log_nfcfg(buf,
8113-
family,
8114-
1,
8115-
AUDIT_NFT_OP_OBJ_RESET,
8116-
GFP_ATOMIC);
8117-
kfree(buf);
8126+
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
8127+
struct netlink_dump_control c = {
8128+
.start = nf_tables_dump_obj_start,
8129+
.dump = nf_tables_dump_obj,
8130+
.done = nf_tables_dump_obj_done,
8131+
.module = THIS_MODULE,
8132+
.data = (void *)nla,
8133+
};
8134+
8135+
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
81188136
}
81198137

8120-
err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid,
8121-
info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0,
8122-
family, table, obj, reset);
8123-
if (err < 0)
8124-
goto err_fill_obj_info;
8138+
skb2 = nf_tables_getobj_single(portid, info, nla, false);
8139+
if (IS_ERR(skb2))
8140+
return PTR_ERR(skb2);
81258141

8126-
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
8142+
return nfnetlink_unicast(skb2, info->net, portid);
8143+
}
81278144

8128-
err_fill_obj_info:
8129-
kfree_skb(skb2);
8130-
return err;
8145+
static int nf_tables_getobj_reset(struct sk_buff *skb,
8146+
const struct nfnl_info *info,
8147+
const struct nlattr * const nla[])
8148+
{
8149+
struct nftables_pernet *nft_net = nft_pernet(info->net);
8150+
u32 portid = NETLINK_CB(skb).portid;
8151+
struct net *net = info->net;
8152+
struct sk_buff *skb2;
8153+
char *buf;
8154+
8155+
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
8156+
struct netlink_dump_control c = {
8157+
.start = nf_tables_dumpreset_obj_start,
8158+
.dump = nf_tables_dumpreset_obj,
8159+
.done = nf_tables_dump_obj_done,
8160+
.module = THIS_MODULE,
8161+
.data = (void *)nla,
8162+
};
8163+
8164+
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
8165+
}
8166+
8167+
if (!try_module_get(THIS_MODULE))
8168+
return -EINVAL;
8169+
rcu_read_unlock();
8170+
mutex_lock(&nft_net->commit_mutex);
8171+
skb2 = nf_tables_getobj_single(portid, info, nla, true);
8172+
mutex_unlock(&nft_net->commit_mutex);
8173+
rcu_read_lock();
8174+
module_put(THIS_MODULE);
8175+
8176+
if (IS_ERR(skb2))
8177+
return PTR_ERR(skb2);
8178+
8179+
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
8180+
nla_len(nla[NFTA_OBJ_TABLE]),
8181+
(char *)nla_data(nla[NFTA_OBJ_TABLE]),
8182+
nft_net->base_seq);
8183+
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
8184+
AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
8185+
kfree(buf);
8186+
8187+
return nfnetlink_unicast(skb2, net, portid);
81318188
}
81328189

81338190
static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
@@ -9410,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
94109467
.policy = nft_obj_policy,
94119468
},
94129469
[NFT_MSG_GETOBJ_RESET] = {
9413-
.call = nf_tables_getobj,
9470+
.call = nf_tables_getobj_reset,
94149471
.type = NFNL_CB_RCU,
94159472
.attr_count = NFTA_OBJ_MAX,
94169473
.policy = nft_obj_policy,

net/netfilter/nfnetlink.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,10 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
427427

428428
nfnl_unlock(subsys_id);
429429

430-
if (nlh->nlmsg_flags & NLM_F_ACK)
430+
if (nlh->nlmsg_flags & NLM_F_ACK) {
431+
memset(&extack, 0, sizeof(extack));
431432
nfnl_err_add(&err_list, nlh, 0, &extack);
433+
}
432434

433435
while (skb->len >= nlmsg_total_size(0)) {
434436
int msglen, type;
@@ -577,6 +579,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
577579
ss->abort(net, oskb, NFNL_ABORT_NONE);
578580
netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
579581
} else if (nlh->nlmsg_flags & NLM_F_ACK) {
582+
memset(&extack, 0, sizeof(extack));
580583
nfnl_err_add(&err_list, nlh, 0, &extack);
581584
}
582585
} else {

net/netfilter/nfnetlink_queue.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
820820
{
821821
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
822822
static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
823-
const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
823+
struct nf_conn *ct = (void *)skb_nfct(entry->skb);
824+
unsigned long status;
825+
unsigned int use;
824826

825-
if (ct && ((ct->status & flags) == IPS_DYING))
827+
if (!ct)
828+
return false;
829+
830+
status = READ_ONCE(ct->status);
831+
if ((status & flags) == IPS_DYING)
826832
return true;
833+
834+
if (status & IPS_CONFIRMED)
835+
return false;
836+
837+
/* in some cases skb_clone() can occur after initial conntrack
838+
* pickup, but conntrack assumes exclusive skb->_nfct ownership for
839+
* unconfirmed entries.
840+
*
841+
* This happens for br_netfilter and with ip multicast routing.
842+
* We can't be solved with serialization here because one clone could
843+
* have been queued for local delivery.
844+
*/
845+
use = refcount_read(&ct->ct_general.use);
846+
if (likely(use == 1))
847+
return false;
848+
849+
/* Can't decrement further? Exclusive ownership. */
850+
if (!refcount_dec_not_one(&ct->ct_general.use))
851+
return false;
852+
853+
skb_set_nfct(entry->skb, 0);
854+
/* No nf_ct_put(): we already decremented .use and it cannot
855+
* drop down to 0.
856+
*/
857+
return true;
827858
#endif
828859
return false;
829860
}

tools/testing/selftests/net/netfilter/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
77
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
88

99
TEST_PROGS := br_netfilter.sh bridge_brouter.sh
10+
TEST_PROGS += br_netfilter_queue.sh
1011
TEST_PROGS += conntrack_icmp_related.sh
1112
TEST_PROGS += conntrack_ipip_mtu.sh
1213
TEST_PROGS += conntrack_tcp_unreplied.sh
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/bin/bash
2+
3+
source lib.sh
4+
5+
checktool "nft --version" "run test without nft tool"
6+
7+
cleanup() {
8+
cleanup_all_ns
9+
}
10+
11+
setup_ns c1 c2 c3 sender
12+
13+
trap cleanup EXIT
14+
15+
nf_queue_wait()
16+
{
17+
grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue"
18+
}
19+
20+
port_add() {
21+
ns="$1"
22+
dev="$2"
23+
a="$3"
24+
25+
ip link add name "$dev" type veth peer name "$dev" netns "$ns"
26+
27+
ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev"
28+
ip -net "$ns" link set "$dev" up
29+
30+
ip link set "$dev" master br0
31+
ip link set "$dev" up
32+
}
33+
34+
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
35+
36+
ip link add br0 type bridge
37+
ip addr add 192.168.1.254/24 dev br0
38+
39+
port_add "$c1" "c1" 1
40+
port_add "$c2" "c2" 2
41+
port_add "$c3" "c3" 3
42+
port_add "$sender" "sender" 253
43+
44+
ip link set br0 up
45+
46+
modprobe -q br_netfilter
47+
48+
sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1
49+
50+
ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1
51+
ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2
52+
ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3
53+
54+
nft -f /dev/stdin <<EOF
55+
table ip filter {
56+
chain forward {
57+
type filter hook forward priority 0; policy accept;
58+
ct state new counter
59+
ip protocol icmp counter queue num 0 bypass
60+
}
61+
}
62+
EOF
63+
./nf_queue -t 5 > /dev/null &
64+
65+
busywait 5000 nf_queue_wait
66+
67+
for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done &
68+
ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255
69+
70+
read t < /proc/sys/kernel/tainted
71+
if [ "$t" -eq 0 ];then
72+
echo PASS: kernel not tainted
73+
else
74+
echo ERROR: kernel is tainted
75+
exit 1
76+
fi
77+
78+
exit 0

0 commit comments

Comments
 (0)