From 51deadb613fbd4bcd5a58ae60eda5b6d7af862a2 Mon Sep 17 00:00:00 2001
From: Jonathan Maple <jmaple@ciq.com>
Date: Fri, 19 Sep 2025 14:31:20 -0400
Subject: [PATCH 1/6] gso: fix udp gso fraglist segmentation after pull from
 frag_list

jira VULN-45766
jira VULN-45767
cve cve-2024-49978
commit-author Willem de Bruijn <willemb@google.com>
commit a1e40ac5b5e9077fe1f7ae0eb88034db0f9ae1ab
upstream-diff contextual diff is off due to massive reworks.
	In addition __udpv6_gso_segment_list_csum definition is not
	included.  This was included via "net/gro.h" via 75082e7f4680
	which is a bug fix to 4721031c3559 "net: move gro definitions to
	include/net/gro.h". Since we also do not have that we're just
	directly including net/ip6_checksum.h to this file.

Detect gso fraglist skbs with corrupted geometry (see below) and
pass these to skb_segment instead of skb_segment_list, as the first
can segment them correctly.

Valid SKB_GSO_FRAGLIST skbs
- consist of two or more segments
- the head_skb holds the protocol headers plus first gso_size
- one or more frag_list skbs hold exactly one segment
- all but the last must be gso_size

Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can
modify these skbs, breaking these invariants.

In extreme cases they pull all data into skb linear. For UDP, this
causes a NULL ptr deref in __udpv4_gso_segment_list_csum at
udp_hdr(seg->next)->dest.

Detect invalid geometry due to pull, by checking head_skb size.
Don't just drop, as this may blackhole a destination. Convert to be
able to pass to regular skb_segment.

Link: https://lore.kernel.org/netdev/20240428142913.18666-1-shiming.cheng@mediatek.com/
Fixes: 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.")
	Signed-off-by: Willem de Bruijn <willemb@google.com>
	Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20241001171752.107580-1-willemdebruijn.kernel@gmail.com
	Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit a1e40ac5b5e9077fe1f7ae0eb88034db0f9ae1ab)
	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
---
 net/ipv4/udp_offload.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0ec0dafc39280..89b388d912c93 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/skbuff.h>
+#include <net/ip6_checksum.h>
 #include <net/udp.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
@@ -276,8 +277,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 	__sum16 check;
 	__be16 newlen;
 
-	if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
-		return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+	if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) {
+		 /* Detect modified geometry and pass those to skb_segment. */
+		if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
+			return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
+		 /* Setup csum, as fraglist skips this in udp4_gro_receive. */
+		gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
+		gso_skb->csum_offset = offsetof(struct udphdr, check);
+		gso_skb->ip_summed = CHECKSUM_PARTIAL;
+
+		uh = udp_hdr(gso_skb);
+		if (is_ipv6)
+			uh->check = ~udp_v6_check(gso_skb->len,
+						  &ipv6_hdr(gso_skb)->saddr,
+						  &ipv6_hdr(gso_skb)->daddr, 0);
+		else
+			uh->check = ~udp_v4_check(gso_skb->len,
+						  ip_hdr(gso_skb)->saddr,
+						  ip_hdr(gso_skb)->daddr, 0);
+	}
 
 	mss = skb_shinfo(gso_skb)->gso_size;
 	if (gso_skb->len <= sizeof(*uh) + mss)

From 64e3a83a7a78638114641b8dc1a2d3beeb22f96d Mon Sep 17 00:00:00 2001
From: Jonathan Maple <jmaple@ciq.com>
Date: Wed, 24 Sep 2025 16:04:39 -0400
Subject: [PATCH 2/6] vsock: Fix transport_* TOCTOU

jira VULN-80682
jira VULN-80681
cve CVE-2025-38461
commit-author Michal Luczaj <mhal@rbox.co>
commit 687aa0c5581b8d4aa87fd92973e4ee576b550cdf

Transport assignment may race with module unload. Protect new_transport
from becoming a stale pointer.

This also takes care of an insecure call in vsock_use_local_transport();
add a lockdep assert.

BUG: unable to handle page fault for address: fffffbfff8056000
Oops: Oops: 0000 [#1] SMP KASAN
RIP: 0010:vsock_assign_transport+0x366/0x600
Call Trace:
 vsock_connect+0x59c/0xc40
 __sys_connect+0xe8/0x100
 __x64_sys_connect+0x6e/0xc0
 do_syscall_64+0x92/0x1c0
 entry_SYSCALL_64_after_hwframe+0x4b/0x53

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
	Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
	Signed-off-by: Michal Luczaj <mhal@rbox.co>
Link: https://patch.msgid.link/20250703-vsock-transports-toctou-v4-2-98f0eb530747@rbox.co
	Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 687aa0c5581b8d4aa87fd92973e4ee576b550cdf)
	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
---
 net/vmw_vsock/af_vsock.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dbc23890fa721..8bfe961b434d0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -398,6 +398,8 @@ EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
 
 static bool vsock_use_local_transport(unsigned int remote_cid)
 {
+	lockdep_assert_held(&vsock_register_mutex);
+
 	if (!transport_local)
 		return false;
 
@@ -455,6 +457,8 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 
 	remote_flags = vsk->remote_addr.svm_flags;
 
+	mutex_lock(&vsock_register_mutex);
+
 	switch (sk->sk_type) {
 	case SOCK_DGRAM:
 		new_transport = transport_dgram;
@@ -469,12 +473,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 			new_transport = transport_h2g;
 		break;
 	default:
-		return -ESOCKTNOSUPPORT;
+		ret = -ESOCKTNOSUPPORT;
+		goto err;
 	}
 
 	if (vsk->transport) {
-		if (vsk->transport == new_transport)
-			return 0;
+		if (vsk->transport == new_transport) {
+			ret = 0;
+			goto err;
+		}
 
 		/* transport->release() must be called with sock lock acquired.
 		 * This path can only be taken during vsock_stream_connect(),
@@ -489,8 +496,16 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 	/* We increase the module refcnt to prevent the transport unloading
 	 * while there are open sockets assigned to it.
 	 */
-	if (!new_transport || !try_module_get(new_transport->module))
-		return -ENODEV;
+	if (!new_transport || !try_module_get(new_transport->module)) {
+		ret = -ENODEV;
+		goto err;
+	}
+
+	/* It's safe to release the mutex after a successful try_module_get().
+	 * Whichever transport `new_transport` points at, it won't go away until
+	 * the last module_put() below or in vsock_deassign_transport().
+	 */
+	mutex_unlock(&vsock_register_mutex);
 
 	ret = new_transport->init(vsk, psk);
 	if (ret) {
@@ -501,6 +516,9 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 	vsk->transport = new_transport;
 
 	return 0;
+err:
+	mutex_unlock(&vsock_register_mutex);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(vsock_assign_transport);
 

From 07a50aedff303e19b11c2cce01f28035c62407a7 Mon Sep 17 00:00:00 2001
From: Jonathan Maple <jmaple@ciq.com>
Date: Wed, 24 Sep 2025 16:04:47 -0400
Subject: [PATCH 3/6] do_change_type(): refuse to operate on unmounted/not ours
 mounts

jira VULN-98607
jira VULN-98606
cve CVE-2025-38498
commit-author Al Viro <viro@zeniv.linux.org.uk>
commit 12f147ddd6de7382dad54812e65f3f08d05809fc

Ensure that propagation settings can only be changed for mounts located
in the caller's mount namespace. This change aligns permission checking
with the rest of mount(2).

	Reviewed-by: Christian Brauner <brauner@kernel.org>
Fixes: 07b20889e305 ("beginning of the shared-subtree proper")
	Reported-by: "Orlando, Noah" <Noah.Orlando@deshaw.com>
	Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
(cherry picked from commit 12f147ddd6de7382dad54812e65f3f08d05809fc)
	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
---
 fs/namespace.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/namespace.c b/fs/namespace.c
index 487417f606c93..8d5d50e2d2ff9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2290,6 +2290,10 @@ static int do_change_type(struct path *path, int ms_flags)
 		return -EINVAL;
 
 	namespace_lock();
+	if (!check_mnt(mnt)) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
 		if (err)

From 70d4b79763c3a31ef086f9e4154c9f0321a24c9a Mon Sep 17 00:00:00 2001
From: Jonathan Maple <jmaple@ciq.com>
Date: Wed, 24 Sep 2025 16:11:22 -0400
Subject: [PATCH 4/6] bpf: Fix a segment issue when downgrading gso_size

jira VULN-38750
jira VULN-38751
cve CVE-2024-42281
commit-author Fred Li <dracodingfly@gmail.com>
commit fa5ef655615a01533035c6139248c5b33aa27028

Linearize the skb when downgrading gso_size because it may trigger a
BUG_ON() later when the skb is segmented as described in [1,2].

Fixes: 2be7e212d5419 ("bpf: add bpf_skb_adjust_room helper")
	Signed-off-by: Fred Li <dracodingfly@gmail.com>
	Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
	Reviewed-by: Willem de Bruijn <willemb@google.com>
	Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/all/20240626065555.35460-2-dracodingfly@gmail.com [1]
Link: https://lore.kernel.org/all/668d5cf1ec330_1c18c32947@willemb.c.googlers.com.notmuch [2]
Link: https://lore.kernel.org/bpf/20240719024653.77006-1-dracodingfly@gmail.com
(cherry picked from commit fa5ef655615a01533035c6139248c5b33aa27028)
	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
---
 net/core/filter.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 06c6ff7fb511b..2bffec483f26e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3497,13 +3497,20 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 	if (skb_is_gso(skb)) {
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
-		/* Due to header grow, MSS needs to be downgraded. */
-		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
-			skb_decrease_gso_size(shinfo, len_diff);
-
 		/* Header must be checked, and gso_segs recomputed. */
 		shinfo->gso_type |= gso_type;
 		shinfo->gso_segs = 0;
+
+		/* Due to header growth, MSS needs to be downgraded.
+		 * There is a BUG_ON() when segmenting the frag_list with
+		 * head_frag true, so linearize the skb after downgrading
+		 * the MSS.
+		 */
+		if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO)) {
+			skb_decrease_gso_size(shinfo, len_diff);
+			if (shinfo->frag_list)
+				return skb_linearize(skb);
+		}
 	}
 
 	return 0;

From 6436952e059edea9a8b90758e24e2cfd766f6930 Mon Sep 17 00:00:00 2001
From: Jonathan Maple <jmaple@ciq.com>
Date: Thu, 25 Sep 2025 13:36:44 -0400
Subject: [PATCH 5/6] net: fix udp gso skb_segment after pull from frag_list

jira VULN-156444
jira VULN-156445
cve CVE-2025-38124
commit-author Shiming Cheng <shiming.cheng@mediatek.com>
commit 3382a1ed7f778db841063f5d7e317ac55f9e7f72

Commit a1e40ac5b5e9 ("net: gso: fix udp gso fraglist segmentation after
pull from frag_list") detected invalid geometry in frag_list skbs and
redirects them from skb_segment_list to more robust skb_segment. But some
packets with modified geometry can also hit bugs in that code. We don't
know how many such cases exist. Addressing each one by one also requires
touching the complex skb_segment code, which risks introducing bugs for
other types of skbs. Instead, linearize all these packets that fail the
basic invariants on gso fraglist skbs. That is more robust.

If only part of the fraglist payload is pulled into head_skb, it will
always cause exception when splitting skbs by skb_segment. For detailed
call stack information, see below.

Valid SKB_GSO_FRAGLIST skbs
- consist of two or more segments
- the head_skb holds the protocol headers plus first gso_size
- one or more frag_list skbs hold exactly one segment
- all but the last must be gso_size

Optional datapath hooks such as NAT and BPF (bpf_skb_pull_data) can
modify fraglist skbs, breaking these invariants.

In extreme cases they pull one part of data into skb linear. For UDP,
this  causes three payloads with lengths of (11,11,10) bytes were
pulled tail to become (12,10,10) bytes.

The skbs no longer meets the above SKB_GSO_FRAGLIST conditions because
payload was pulled into head_skb, it needs to be linearized before pass
to regular skb_segment.

    skb_segment+0xcd0/0xd14
    __udp_gso_segment+0x334/0x5f4
    udp4_ufo_fragment+0x118/0x15c
    inet_gso_segment+0x164/0x338
    skb_mac_gso_segment+0xc4/0x13c
    __skb_gso_segment+0xc4/0x124
    validate_xmit_skb+0x9c/0x2c0
    validate_xmit_skb_list+0x4c/0x80
    sch_direct_xmit+0x70/0x404
    __dev_queue_xmit+0x64c/0xe5c
    neigh_resolve_output+0x178/0x1c4
    ip_finish_output2+0x37c/0x47c
    __ip_finish_output+0x194/0x240
    ip_finish_output+0x20/0xf4
    ip_output+0x100/0x1a0
    NF_HOOK+0xc4/0x16c
    ip_forward+0x314/0x32c
    ip_rcv+0x90/0x118
    __netif_receive_skb+0x74/0x124
    process_backlog+0xe8/0x1a4
    __napi_poll+0x5c/0x1f8
    net_rx_action+0x154/0x314
    handle_softirqs+0x154/0x4b8

    [118.376811] [C201134] rxq0_pus: [name:bug&]kernel BUG at net/core/skbuff.c:4278!
    [118.376829] [C201134] rxq0_pus: [name:traps&]Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
    [118.470774] [C201134] rxq0_pus: [name:mrdump&]Kernel Offset: 0x178cc00000 from 0xffffffc008000000
    [118.470810] [C201134] rxq0_pus: [name:mrdump&]PHYS_OFFSET: 0x40000000
    [118.470827] [C201134] rxq0_pus: [name:mrdump&]pstate: 60400005 (nZCv daif +PAN -UAO)
    [118.470848] [C201134] rxq0_pus: [name:mrdump&]pc : [0xffffffd79598aefc] skb_segment+0xcd0/0xd14
    [118.470900] [C201134] rxq0_pus: [name:mrdump&]lr : [0xffffffd79598a5e8] skb_segment+0x3bc/0xd14
    [118.470928] [C201134] rxq0_pus: [name:mrdump&]sp : ffffffc008013770

Fixes: a1e40ac5b5e9 ("gso: fix udp gso fraglist segmentation after pull from frag_list")
	Signed-off-by: Shiming Cheng <shiming.cheng@mediatek.com>
	Reviewed-by: Willem de Bruijn <willemb@google.com>
	Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 3382a1ed7f778db841063f5d7e317ac55f9e7f72)
	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
---
 net/ipv4/udp_offload.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 89b388d912c93..8f1464cf3a378 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -276,12 +276,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
 	bool copy_dtor;
 	__sum16 check;
 	__be16 newlen;
+	int ret = 0;
 
 	if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) {
 		 /* Detect modified geometry and pass those to skb_segment. */
 		if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
 			return __udp_gso_segment_list(gso_skb, features, is_ipv6);
 
+		ret = __skb_linearize(gso_skb);
+		if (ret)
+			return ERR_PTR(ret);
+
 		 /* Setup csum, as fraglist skips this in udp4_gro_receive. */
 		gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
 		gso_skb->csum_offset = offsetof(struct udphdr, check);

From 106adb1d0a8fc3fb9f0e12e5522a69458b879734 Mon Sep 17 00:00:00 2001
From: Jonathan Maple <jmaple@ciq.com>
Date: Thu, 25 Sep 2025 14:35:31 -0400
Subject: [PATCH 6/6] use uniform permission checks for all mount propagation
 changes

jira VULN-98606
jira VULN-98607
cve-bf CVE-2025-38498
commit-author Al Viro <viro@zeniv.linux.org.uk>
commit cffd0441872e7f6b1fce5e78fb1c99187a291330
upstream-diff This is a partial backport due to missing 9ffb14ef61ba
	which introduces do_set_group(). This is where the below
	operation on diffferent aspects of the same thing is introduced.
	This change does however introduce some additional safety checks
	which should be considered.

do_change_type() and do_set_group() are operating on different
aspects of the same thing - propagation graph.  The latter
asks for mounts involved to be mounted in namespace(s) the caller
has CAP_SYS_ADMIN for.  The former is a mess - originally it
didn't even check that mount *is* mounted.  That got fixed,
but the resulting check turns out to be too strict for userland -
in effect, we check that mount is in our namespace, having already
checked that we have CAP_SYS_ADMIN there.

What we really need (in both cases) is
	* only touch mounts that are mounted.  That's a must-have
constraint - data corruption happens if it get violated.
	* don't allow to mess with a namespace unless you already
have enough permissions to do so (i.e. CAP_SYS_ADMIN in its userns).

That's an equivalent of what do_set_group() does; let's extract that
into a helper (may_change_propagation()) and use it in both
do_set_group() and do_change_type().

Fixes: 12f147ddd6de "do_change_type(): refuse to operate on unmounted/not ours mounts"
	Acked-by: Andrei Vagin <avagin@gmail.com>
	Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
	Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
	Reviewed-by: Christian Brauner <brauner@kernel.org>
	Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
(cherry picked from commit cffd0441872e7f6b1fce5e78fb1c99187a291330)
	Signed-off-by: Jonathan Maple <jmaple@ciq.com>
---
 fs/namespace.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 8d5d50e2d2ff9..57e5f4b63d6b8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2254,6 +2254,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
 	return attach_recursive_mnt(mnt, p, mp, NULL);
 }
 
+static int may_change_propagation(const struct mount *m)
+{
+	struct mnt_namespace *ns = m->mnt_ns;
+
+        // it must be mounted in some namespace
+        if (IS_ERR_OR_NULL(ns))         // is_mounted()
+                return -EINVAL;
+        // and the caller must be admin in userns of that namespace
+        if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
+                return -EPERM;
+        return 0;
+}
+
 /*
  * Sanity check the flags to change_mnt_propagation.
  */
@@ -2290,10 +2303,10 @@ static int do_change_type(struct path *path, int ms_flags)
 		return -EINVAL;
 
 	namespace_lock();
-	if (!check_mnt(mnt)) {
-		err = -EINVAL;
+	err = may_change_propagation(mnt);
+	if (err)
 		goto out_unlock;
-	}
+
 	if (type == MS_SHARED) {
 		err = invent_group_ids(mnt, recurse);
 		if (err)