diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6dbdf60b342f6..1426467df547a 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -93,6 +93,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net, return ip6_route_output_flags(net, sk, fl6, 0); } +int ip6_route_reply_fetch_dst(struct sk_buff *skb); + /* Only conditionally release dst if flags indicates * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list. */ diff --git a/include/net/route.h b/include/net/route.h index 8e39aa822cf98..1f032f768d526 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -173,6 +173,7 @@ struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); +int ip_route_reply_fetch_dst(struct sk_buff *skb); static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp) { diff --git a/net/core/filter.c b/net/core/filter.c index 7a72f766aacfa..050872324575e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -85,6 +85,10 @@ #include #include #include +#include +#include +#include +#include #include "dev.h" @@ -12148,6 +12152,53 @@ __bpf_kfunc int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, return 0; } +__bpf_kfunc int bpf_icmp_send_unreach(struct __sk_buff *__skb, int code) +{ + struct sk_buff *skb = (struct sk_buff *)__skb; + struct sk_buff *nskb; + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (code < 0 || code > NR_ICMP_UNREACH) + return -EINVAL; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (ip_route_reply_fetch_dst(nskb) < 0) { + kfree_skb(nskb); + return -EHOSTUNREACH; + } + + icmp_send(nskb, ICMP_DEST_UNREACH, code, 0); + kfree_skb(nskb); + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + if (code < 0 || code > ICMPV6_REJECT_ROUTE) + return -EINVAL; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return -ENOMEM; + + if (ip6_route_reply_fetch_dst(nskb) < 0) { + kfree_skb(nskb); + return -EHOSTUNREACH; + } + + icmpv6_send(nskb, ICMPV6_DEST_UNREACH, code, 0); + kfree_skb(nskb); + break; +#endif + default: + return -EPROTONOSUPPORT; + } + + return SK_DROP; +} + __bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, @@ -12185,6 +12236,10 @@ BTF_KFUNCS_START(bpf_kfunc_check_set_sock_ops) BTF_ID_FLAGS(func, bpf_sock_ops_enable_tx_tstamp, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_kfunc_check_set_sock_ops) +BTF_KFUNCS_START(bpf_kfunc_check_set_icmp_send_unreach) +BTF_ID_FLAGS(func, bpf_icmp_send_unreach, KF_TRUSTED_ARGS) +BTF_KFUNCS_END(bpf_kfunc_check_set_icmp_send_unreach) + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -12210,6 +12265,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { .set = &bpf_kfunc_check_set_sock_ops, }; +static const struct btf_kfunc_id_set bpf_kfunc_set_icmp_send_unreach = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_icmp_send_unreach, +}; + static int __init bpf_kfunc_init(void) { int ret; @@ -12229,6 +12289,7 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_tcp_reqsk); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_icmp_send_unreach); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); } late_initcall(bpf_kfunc_init); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 87fd945a0d27a..76beb78f556a8 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -220,21 +220,6 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, } EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); -static int nf_reject_fill_skb_dst(struct sk_buff *skb_in) -{ - struct dst_entry *dst = NULL; - struct flowi fl; - - memset(&fl, 0, sizeof(struct flowi)); - fl.u.ip4.daddr = ip_hdr(skb_in)->saddr; - nf_ip_route(dev_net(skb_in->dev), &dst, &fl, false); - if (!dst) - return -1; - - skb_dst_set(skb_in, dst); - return 0; -} - /* Send RST reply */ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) @@ -248,7 +233,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb, return; if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && - nf_reject_fill_skb_dst(oldskb) < 0) + ip_route_reply_fetch_dst(oldskb) < 0) return; if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -322,7 +307,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) return; if ((hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) && - nf_reject_fill_skb_dst(skb_in) < 0) + ip_route_reply_fetch_dst(skb_in) < 0) return; if (skb_csum_unnecessary(skb_in) || diff --git a/net/ipv4/route.c b/net/ipv4/route.c index fccb05fb3a794..59b8fc3c01c05 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2934,6 +2934,21 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); +int ip_route_reply_fetch_dst(struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi4 fl4 = { + .daddr = ip_hdr(skb)->saddr + }; + + rt = ip_route_output_key(dev_net(skb->dev), &fl4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + skb_dst_set(skb, &rt->dst); + return 0; +} +EXPORT_SYMBOL_GPL(ip_route_reply_fetch_dst); + /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, dscp_t dscp, diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 9ae2b2725bf99..994a3b88ac525 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -250,21 +250,6 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, } EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); -static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) -{ - struct dst_entry *dst = NULL; - struct flowi fl; - - memset(&fl, 0, sizeof(struct flowi)); - fl.u.ip6.daddr = ipv6_hdr(skb_in)->saddr; - nf_ip6_route(dev_net(skb_in->dev), &dst, &fl, false); - if (!dst) - return -1; - - skb_dst_set(skb_in, dst); - return 0; -} - void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) { @@ -398,7 +383,7 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, skb_in->dev = net->loopback_dev; if ((hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_INGRESS) && - nf_reject6_fill_skb_dst(skb_in) < 0) + ip6_route_reply_fetch_dst(skb_in) < 0) return; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0d5464c649652..de61540f9524d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2705,6 +2705,24 @@ struct dst_entry *ip6_route_output_flags(struct net *net, } EXPORT_SYMBOL_GPL(ip6_route_output_flags); +int ip6_route_reply_fetch_dst(struct sk_buff *skb) +{ + struct dst_entry *result; + struct flowi6 fl = { + .daddr = ipv6_hdr(skb)->saddr + }; + int err; + + result = ip6_route_output(dev_net(skb->dev), NULL, &fl); + err = result->error; + if (err) + dst_release(result); + else + skb_dst_set(skb, result); + return err; +} +EXPORT_SYMBOL_GPL(ip6_route_reply_fetch_dst); + struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { struct rt6_info *rt, *ort = dst_rt6_info(dst_orig); diff --git a/tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c b/tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c new file mode 100644 index 0000000000000..414c1ed8ced39 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/icmp_send_unreach_kfunc.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "icmp_send_unreach.skel.h" + +#define TIMEOUT_MS 1000 +#define SRV_PORT 54321 + +#define ICMP_DEST_UNREACH 3 + +#define ICMP_FRAG_NEEDED 4 +#define NR_ICMP_UNREACH 15 + +static void read_icmp_errqueue(int sockfd, int expected_code) +{ + ssize_t n; + struct sock_extended_err *sock_err; + struct cmsghdr *cm; + char ctrl_buf[512]; + struct msghdr msg = { + .msg_control = ctrl_buf, + .msg_controllen = sizeof(ctrl_buf), + }; + + n = recvmsg(sockfd, &msg, MSG_ERRQUEUE); + if (!ASSERT_GE(n, 0, "recvmsg_errqueue")) + return; + + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (!ASSERT_EQ(cm->cmsg_level, IPPROTO_IP, "cmsg_type") || + !ASSERT_EQ(cm->cmsg_type, IP_RECVERR, "cmsg_level")) + continue; + + sock_err = (struct sock_extended_err *)CMSG_DATA(cm); + + if (!ASSERT_EQ(sock_err->ee_origin, SO_EE_ORIGIN_ICMP, + "sock_err_origin_icmp")) + return; + if (!ASSERT_EQ(sock_err->ee_type, ICMP_DEST_UNREACH, + "sock_err_type_dest_unreach")) + return; + ASSERT_EQ(sock_err->ee_code, expected_code, "sock_err_code"); + } +} + +void test_icmp_send_unreach_kfunc(void) +{ + struct icmp_send_unreach *skel; + int cgroup_fd = -1, client_fd = 1, srv_fd = -1; + int *code; + + skel = icmp_send_unreach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + cgroup_fd = test__join_cgroup("/icmp_send_unreach_cgroup"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + skel->links.egress = + bpf_program__attach_cgroup(skel->progs.egress, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.egress, "prog_attach_cgroup")) + goto cleanup; + + code = &skel->bss->unreach_code; + + for (*code = 0; *code <= NR_ICMP_UNREACH; (*code)++) { + // The TCP stack reacts differently when asking for + // fragmentation, let's ignore it for now + if (*code == ICMP_FRAG_NEEDED) + continue; + + skel->bss->kfunc_ret = -1; + + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", + SRV_PORT, TIMEOUT_MS); + if (!ASSERT_GE(srv_fd, 0, "start_server")) + goto for_cleanup; + + client_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(client_fd, 0, "client_socket"); + + client_fd = connect_to_fd(srv_fd, 0); + if (!ASSERT_GE(client_fd, 0, "client_connect")) + goto for_cleanup; + + read_icmp_errqueue(client_fd, *code); + + ASSERT_EQ(skel->bss->kfunc_ret, SK_DROP, "kfunc_ret"); +for_cleanup: + close(client_fd); + close(srv_fd); + } + +cleanup: + icmp_send_unreach__destroy(skel); + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/progs/icmp_send_unreach.c b/tools/testing/selftests/bpf/progs/icmp_send_unreach.c new file mode 100644 index 0000000000000..15783e5d1d65f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/icmp_send_unreach.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include +#include + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +int unreach_code = 0; +int kfunc_ret = 0; + +#define SERVER_PORT 54321 +#define SERVER_IP 0x7F000001 + +SEC("cgroup_skb/egress") +int egress(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct iphdr *iph; + struct tcphdr *tcph; + + iph = data; + if ((void *)(iph + 1) > data_end || iph->version != 4 || + iph->protocol != IPPROTO_TCP || iph->daddr != bpf_htonl(SERVER_IP)) + return SK_PASS; + + tcph = (void *)iph + iph->ihl * 4; + if ((void *)(tcph + 1) > data_end || + tcph->dest != bpf_htons(SERVER_PORT)) + return SK_PASS; + + kfunc_ret = bpf_icmp_send_unreach(skb, unreach_code); + + /* returns SK_PASS to execute the test case quicker */ + return SK_PASS; +}