From 51deadb613fbd4bcd5a58ae60eda5b6d7af862a2 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Fri, 19 Sep 2025 14:31:20 -0400 Subject: [PATCH 1/6] gso: fix udp gso fraglist segmentation after pull from frag_list jira VULN-45766 jira VULN-45767 cve cve-2024-49978 commit-author Willem de Bruijn commit a1e40ac5b5e9077fe1f7ae0eb88034db0f9ae1ab upstream-diff contextual diff is off due to massive reworks. In addition __udpv6_gso_segment_list_csum definition is not included. This was included via "net/gro.h" via 75082e7f4680 which is a bug fix to 4721031c3559 "net: move gro definitions to include/net/gro.h". Since we also do not have that we're just directly including net/ip6_checksum.h to this file. Detect gso fraglist skbs with corrupted geometry (see below) and pass these to skb_segment instead of skb_segment_list, as the first can segment them correctly. Valid SKB_GSO_FRAGLIST skbs - consist of two or more segments - the head_skb holds the protocol headers plus first gso_size - one or more frag_list skbs hold exactly one segment - all but the last must be gso_size Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can modify these skbs, breaking these invariants. In extreme cases they pull all data into skb linear. For UDP, this causes a NULL ptr deref in __udpv4_gso_segment_list_csum at udp_hdr(seg->next)->dest. Detect invalid geometry due to pull, by checking head_skb size. Don't just drop, as this may blackhole a destination. Convert to be able to pass to regular skb_segment. Link: https://lore.kernel.org/netdev/20240428142913.18666-1-shiming.cheng@mediatek.com/ Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") Signed-off-by: Willem de Bruijn Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20241001171752.107580-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski (cherry picked from commit a1e40ac5b5e9077fe1f7ae0eb88034db0f9ae1ab) Signed-off-by: Jonathan Maple --- net/ipv4/udp_offload.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0ec0dafc39280..89b388d912c93 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -276,8 +277,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, __sum16 check; __be16 newlen; - if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features, is_ipv6); + if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) { + /* Detect modified geometry and pass those to skb_segment. */ + if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) + return __udp_gso_segment_list(gso_skb, features, is_ipv6); + + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ + gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; + gso_skb->csum_offset = offsetof(struct udphdr, check); + gso_skb->ip_summed = CHECKSUM_PARTIAL; + + uh = udp_hdr(gso_skb); + if (is_ipv6) + uh->check = ~udp_v6_check(gso_skb->len, + &ipv6_hdr(gso_skb)->saddr, + &ipv6_hdr(gso_skb)->daddr, 0); + else + uh->check = ~udp_v4_check(gso_skb->len, + ip_hdr(gso_skb)->saddr, + ip_hdr(gso_skb)->daddr, 0); + } mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) From 64e3a83a7a78638114641b8dc1a2d3beeb22f96d Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Wed, 24 Sep 2025 16:04:39 -0400 Subject: [PATCH 2/6] vsock: Fix transport_* TOCTOU jira VULN-80682 jira VULN-80681 cve CVE-2025-38461 commit-author Michal Luczaj commit 687aa0c5581b8d4aa87fd92973e4ee576b550cdf Transport assignment may race with module unload. Protect new_transport from becoming a stale pointer. This also takes care of an insecure call in vsock_use_local_transport(); add a lockdep assert. BUG: unable to handle page fault for address: fffffbfff8056000 Oops: Oops: 0000 [#1] SMP KASAN RIP: 0010:vsock_assign_transport+0x366/0x600 Call Trace: vsock_connect+0x59c/0xc40 __sys_connect+0xe8/0x100 __x64_sys_connect+0x6e/0xc0 do_syscall_64+0x92/0x1c0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20250703-vsock-transports-toctou-v4-2-98f0eb530747@rbox.co Signed-off-by: Jakub Kicinski (cherry picked from commit 687aa0c5581b8d4aa87fd92973e4ee576b550cdf) Signed-off-by: Jonathan Maple --- net/vmw_vsock/af_vsock.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index dbc23890fa721..8bfe961b434d0 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -398,6 +398,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept); static bool vsock_use_local_transport(unsigned int remote_cid) { + lockdep_assert_held(&vsock_register_mutex); + if (!transport_local) return false; @@ -455,6 +457,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) remote_flags = vsk->remote_addr.svm_flags; + mutex_lock(&vsock_register_mutex); + switch (sk->sk_type) { case SOCK_DGRAM: new_transport = transport_dgram; @@ -469,12 +473,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) new_transport = transport_h2g; break; default: - return -ESOCKTNOSUPPORT; + ret = -ESOCKTNOSUPPORT; + goto err; } if (vsk->transport) { - if (vsk->transport == new_transport) - return 0; + if (vsk->transport == new_transport) { + ret = 0; + goto err; + } /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_stream_connect(), @@ -489,8 +496,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) /* We increase the module refcnt to prevent the transport unloading * while there are open sockets assigned to it. */ - if (!new_transport || !try_module_get(new_transport->module)) - return -ENODEV; + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); ret = new_transport->init(vsk, psk); if (ret) { @@ -501,6 +516,9 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->transport = new_transport; return 0; +err: + mutex_unlock(&vsock_register_mutex); + return ret; } EXPORT_SYMBOL_GPL(vsock_assign_transport); From 07a50aedff303e19b11c2cce01f28035c62407a7 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Wed, 24 Sep 2025 16:04:47 -0400 Subject: [PATCH 3/6] do_change_type(): refuse to operate on unmounted/not ours mounts jira VULN-98607 jira VULN-98606 cve CVE-2025-38498 commit-author Al Viro commit 12f147ddd6de7382dad54812e65f3f08d05809fc Ensure that propagation settings can only be changed for mounts located in the caller's mount namespace. This change aligns permission checking with the rest of mount(2). Reviewed-by: Christian Brauner Fixes: 07b20889e305 ("beginning of the shared-subtree proper") Reported-by: "Orlando, Noah" Signed-off-by: Al Viro (cherry picked from commit 12f147ddd6de7382dad54812e65f3f08d05809fc) Signed-off-by: Jonathan Maple --- fs/namespace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 487417f606c93..8d5d50e2d2ff9 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2290,6 +2290,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); + if (!check_mnt(mnt)) { + err = -EINVAL; + goto out_unlock; + } if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) From 70d4b79763c3a31ef086f9e4154c9f0321a24c9a Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Wed, 24 Sep 2025 16:11:22 -0400 Subject: [PATCH 4/6] bpf: Fix a segment issue when downgrading gso_size jira VULN-38750 jira VULN-38751 cve CVE-2024-42281 commit-author Fred Li commit fa5ef655615a01533035c6139248c5b33aa27028 Linearize the skb when downgrading gso_size because it may trigger a BUG_ON() later when the skb is segmented as described in [1,2]. Fixes: 2be7e212d5419 ("bpf: add bpf_skb_adjust_room helper") Signed-off-by: Fred Li Signed-off-by: Daniel Borkmann Reviewed-by: Willem de Bruijn Acked-by: Daniel Borkmann Link: https://lore.kernel.org/all/20240626065555.35460-2-dracodingfly@gmail.com [1] Link: https://lore.kernel.org/all/668d5cf1ec330_1c18c32947@willemb.c.googlers.com.notmuch [2] Link: https://lore.kernel.org/bpf/20240719024653.77006-1-dracodingfly@gmail.com (cherry picked from commit fa5ef655615a01533035c6139248c5b33aa27028) Signed-off-by: Jonathan Maple --- net/core/filter.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 06c6ff7fb511b..2bffec483f26e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3497,13 +3497,20 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); - /* Due to header grow, MSS needs to be downgraded. */ - if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) - skb_decrease_gso_size(shinfo, len_diff); - /* Header must be checked, and gso_segs recomputed. */ shinfo->gso_type |= gso_type; shinfo->gso_segs = 0; + + /* Due to header growth, MSS needs to be downgraded. + * There is a BUG_ON() when segmenting the frag_list with + * head_frag true, so linearize the skb after downgrading + * the MSS. + */ + if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) { + skb_decrease_gso_size(shinfo, len_diff); + if (shinfo->frag_list) + return skb_linearize(skb); + } } return 0; From 6436952e059edea9a8b90758e24e2cfd766f6930 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 25 Sep 2025 13:36:44 -0400 Subject: [PATCH 5/6] net: fix udp gso skb_segment after pull from frag_list jira VULN-156444 jira VULN-156445 cve CVE-2025-38124 commit-author Shiming Cheng commit 3382a1ed7f778db841063f5d7e317ac55f9e7f72 Commit a1e40ac5b5e9 ("net: gso: fix udp gso fraglist segmentation after pull from frag_list") detected invalid geometry in frag_list skbs and redirects them from skb_segment_list to more robust skb_segment. But some packets with modified geometry can also hit bugs in that code. We don't know how many such cases exist. Addressing each one by one also requires touching the complex skb_segment code, which risks introducing bugs for other types of skbs. Instead, linearize all these packets that fail the basic invariants on gso fraglist skbs. That is more robust. If only part of the fraglist payload is pulled into head_skb, it will always cause exception when splitting skbs by skb_segment. For detailed call stack information, see below. Valid SKB_GSO_FRAGLIST skbs - consist of two or more segments - the head_skb holds the protocol headers plus first gso_size - one or more frag_list skbs hold exactly one segment - all but the last must be gso_size Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can modify fraglist skbs, breaking these invariants. In extreme cases they pull one part of data into skb linear. For UDP, this causes three payloads with lengths of (11,11,10) bytes were pulled tail to become (12,10,10) bytes. The skbs no longer meets the above SKB_GSO_FRAGLIST conditions because payload was pulled into head_skb, it needs to be linearized before pass to regular skb_segment. skb_segment+0xcd0/0xd14 __udp_gso_segment+0x334/0x5f4 udp4_ufo_fragment+0x118/0x15c inet_gso_segment+0x164/0x338 skb_mac_gso_segment+0xc4/0x13c __skb_gso_segment+0xc4/0x124 validate_xmit_skb+0x9c/0x2c0 validate_xmit_skb_list+0x4c/0x80 sch_direct_xmit+0x70/0x404 __dev_queue_xmit+0x64c/0xe5c neigh_resolve_output+0x178/0x1c4 ip_finish_output2+0x37c/0x47c __ip_finish_output+0x194/0x240 ip_finish_output+0x20/0xf4 ip_output+0x100/0x1a0 NF_HOOK+0xc4/0x16c ip_forward+0x314/0x32c ip_rcv+0x90/0x118 __netif_receive_skb+0x74/0x124 process_backlog+0xe8/0x1a4 __napi_poll+0x5c/0x1f8 net_rx_action+0x154/0x314 handle_softirqs+0x154/0x4b8 [118.376811] [C201134] rxq0_pus: [name:bug&]kernel BUG at net/core/skbuff.c:4278! [118.376829] [C201134] rxq0_pus: [name:traps&]Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [118.470774] [C201134] rxq0_pus: [name:mrdump&]Kernel Offset: 0x178cc00000 from 0xffffffc008000000 [118.470810] [C201134] rxq0_pus: [name:mrdump&]PHYS_OFFSET: 0x40000000 [118.470827] [C201134] rxq0_pus: [name:mrdump&]pstate: 60400005 (nZCv daif +PAN -UAO) [118.470848] [C201134] rxq0_pus: [name:mrdump&]pc : [0xffffffd79598aefc] skb_segment+0xcd0/0xd14 [118.470900] [C201134] rxq0_pus: [name:mrdump&]lr : [0xffffffd79598a5e8] skb_segment+0x3bc/0xd14 [118.470928] [C201134] rxq0_pus: [name:mrdump&]sp : ffffffc008013770 Fixes: a1e40ac5b5e9 ("gso: fix udp gso fraglist segmentation after pull from frag_list") Signed-off-by: Shiming Cheng Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller (cherry picked from commit 3382a1ed7f778db841063f5d7e317ac55f9e7f72) Signed-off-by: Jonathan Maple --- net/ipv4/udp_offload.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 89b388d912c93..8f1464cf3a378 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -276,12 +276,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, bool copy_dtor; __sum16 check; __be16 newlen; + int ret = 0; if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) { /* Detect modified geometry and pass those to skb_segment. */ if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) return __udp_gso_segment_list(gso_skb, features, is_ipv6); + ret = __skb_linearize(gso_skb); + if (ret) + return ERR_PTR(ret); + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; gso_skb->csum_offset = offsetof(struct udphdr, check); From 106adb1d0a8fc3fb9f0e12e5522a69458b879734 Mon Sep 17 00:00:00 2001 From: Jonathan Maple Date: Thu, 25 Sep 2025 14:35:31 -0400 Subject: [PATCH 6/6] use uniform permission checks for all mount propagation changes jira VULN-98606 jira VULN-98607 cve-bf CVE-2025-38498 commit-author Al Viro commit cffd0441872e7f6b1fce5e78fb1c99187a291330 upstream-diff This is a partial backport due to missing 9ffb14ef61ba which introduces do_set_group(). This is where the below operation on diffferent aspects of the same thing is introduced. This change does however introduce some additional safety checks which should be considered. do_change_type() and do_set_group() are operating on different aspects of the same thing - propagation graph. The latter asks for mounts involved to be mounted in namespace(s) the caller has CAP_SYS_ADMIN for. The former is a mess - originally it didn't even check that mount *is* mounted. That got fixed, but the resulting check turns out to be too strict for userland - in effect, we check that mount is in our namespace, having already checked that we have CAP_SYS_ADMIN there. What we really need (in both cases) is * only touch mounts that are mounted. That's a must-have constraint - data corruption happens if it get violated. * don't allow to mess with a namespace unless you already have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns). That's an equivalent of what do_set_group() does; let's extract that into a helper (may_change_propagation()) and use it in both do_set_group() and do_change_type(). Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts" Acked-by: Andrei Vagin Reviewed-by: Pavel Tikhomirov Tested-by: Pavel Tikhomirov Reviewed-by: Christian Brauner Signed-off-by: Al Viro (cherry picked from commit cffd0441872e7f6b1fce5e78fb1c99187a291330) Signed-off-by: Jonathan Maple --- fs/namespace.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 8d5d50e2d2ff9..57e5f4b63d6b8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2254,6 +2254,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) return attach_recursive_mnt(mnt, p, mp, NULL); } +static int may_change_propagation(const struct mount *m) +{ + struct mnt_namespace *ns = m->mnt_ns; + + // it must be mounted in some namespace + if (IS_ERR_OR_NULL(ns)) // is_mounted() + return -EINVAL; + // and the caller must be admin in userns of that namespace + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + /* * Sanity check the flags to change_mnt_propagation. */ @@ -2290,10 +2303,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); - if (!check_mnt(mnt)) { - err = -EINVAL; + err = may_change_propagation(mnt); + if (err) goto out_unlock; - } + if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err)