From e9906681aa59356770a0f5ffd720e475e1fa9733 Mon Sep 17 00:00:00 2001 From: haodongdong Date: Mon, 20 Jan 2025 11:09:02 +0800 Subject: [PATCH 01/85] scsi: leapioraid: update leapioraid driver version update leapioraid driver version Signed-off-by: haodongdong --- drivers/scsi/leapioraid/leapioraid_func.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/leapioraid/leapioraid_func.h b/drivers/scsi/leapioraid/leapioraid_func.h index 8ca8fc0a6f262..76babcb40766c 100644 --- a/drivers/scsi/leapioraid/leapioraid_func.h +++ b/drivers/scsi/leapioraid/leapioraid_func.h @@ -67,10 +67,10 @@ #define LEAPIORAID_DRIVER_NAME "LeapIoRaid" #define LEAPIORAID_AUTHOR "LeapIO Inc." #define LEAPIORAID_DESCRIPTION "LEAPIO RAID Driver" -#define LEAPIORAID_DRIVER_VERSION "1.00.00.00" +#define LEAPIORAID_DRIVER_VERSION "1.02.02.00" #define LEAPIORAID_MAJOR_VERSION (1) -#define LEAPIORAID_MINOR_VERSION (00) -#define LEAPIORAID_BUILD_VERSION (00) +#define LEAPIORAID_MINOR_VERSION (02) +#define LEAPIORAID_BUILD_VERSION (02) #define LEAPIORAID_RELEASE_VERSION (00) #define LEAPIORAID_VENDOR_ID (0xD405) From 33ba92c14b6d589aba7acff4a3129df915f6690f Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 22 Dec 2023 12:39:57 -0800 Subject: [PATCH 02/85] platform/x86: intel-uncore-freq: Add additional client processors mainline inclusion from mainline-v6.8-rc1 category: feature Add support for client processors starting from Kaby Lake. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20231222203957.1348043-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede (cherry picked from commit 27f2b08735c90d0f6bd5888edb58bbce7fcf22a8) --- .../x86/intel/uncore-frequency/uncore-frequency.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index c68e69d7b242c..d15db9d7ac6f4 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -230,6 +230,16 @@ static const struct x86_cpu_id intel_uncore_cpu_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, NULL), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, NULL), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, NULL), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL), @@ -237,6 +247,9 @@ static const struct x86_cpu_id intel_uncore_cpu_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL), X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, NULL), + X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE_H, NULL), + X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_cpu_ids); From 03ba367d580503f898c51cc4f5adb65272daaf6c Mon Sep 17 00:00:00 2001 From: Yu Ma Date: Wed, 17 Jul 2024 10:50:16 -0400 Subject: [PATCH 03/85] fs/file.c: remove sanity_check and add likely/unlikely in alloc_fd() mainline inclusion from mainline-v6.13-rc1 category: performance alloc_fd() has a sanity check inside to make sure the struct file mapping to the allocated fd is NULL. Remove this sanity check since it can be assured by exisitng zero initilization and NULL set when recycling fd. Meanwhile, add likely/unlikely and expand_file() call avoidance to reduce the work under file_lock. Reviewed-by: Jan Kara Reviewed-by: Tim Chen Signed-off-by: Yu Ma Link: https://lore.kernel.org/r/20240717145018.3972922-2-yu.ma@intel.com Signed-off-by: Christian Brauner Signed-off-by: Al Viro (cherry picked from commit 52732bb9abc9ee5b82ed62edef51be4a255fc78a) --- fs/file.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/fs/file.c b/fs/file.c index 32efaebea672a..4926670576d4e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -491,7 +491,7 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) if (fd < files->next_fd) fd = files->next_fd; - if (fd < fdt->max_fds) + if (likely(fd < fdt->max_fds)) fd = find_next_fd(fdt, fd); /* @@ -499,19 +499,21 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) * will limit the total number of files that can be opened. */ error = -EMFILE; - if (fd >= end) + if (unlikely(fd >= end)) goto out; - error = expand_files(files, fd); - if (error < 0) - goto out; + if (unlikely(fd >= fdt->max_fds)) { + error = expand_files(files, fd); + if (error < 0) + goto out; - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + } if (start <= files->next_fd) files->next_fd = fd + 1; @@ -522,13 +524,6 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags) else __clear_close_on_exec(fd, fdt); error = fd; -#if 1 - /* Sanity check */ - if (rcu_access_pointer(fdt->fd[fd]) != NULL) { - printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); - rcu_assign_pointer(fdt->fd[fd], NULL); - } -#endif out: spin_unlock(&files->file_lock); @@ -591,7 +586,7 @@ void fd_install(unsigned int fd, struct file *file) rcu_read_unlock_sched(); spin_lock(&files->file_lock); fdt = files_fdtable(files); - BUG_ON(fdt->fd[fd] != NULL); + WARN_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); return; From c174854736b4d2c85d5a4f22de605b0a3bf81420 Mon Sep 17 00:00:00 2001 From: Yu Ma Date: Wed, 17 Jul 2024 10:50:17 -0400 Subject: [PATCH 04/85] fs/file.c: conditionally clear full_fds mainline inclusion from mainline-v6.13-rc1 category: performance 64 bits in open_fds are mapped to a common bit in full_fds_bits. It is very likely that a bit in full_fds_bits has been cleared before in __clear_open_fds()'s operation. Check the clear bit in full_fds_bits before clearing to avoid unnecessary write and cache bouncing. See commit fc90888d07b8 ("vfs: conditionally clear close-on-exec flag") for a similar optimization. take stock kernel with patch 1 as baseline, it improves pts/blogbench-1.1.0 read for 13%, and write for 5% on Intel ICX 160 cores configuration with v6.10-rc7. Reviewed-by: Jan Kara Reviewed-by: Tim Chen Signed-off-by: Yu Ma Link: https://lore.kernel.org/r/20240717145018.3972922-3-yu.ma@intel.com Signed-off-by: Christian Brauner Signed-off-by: Al Viro (cherry picked from commit c9a3019603b8a8519f1b6d8ae0059bcb2965f8fe) --- fs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 4926670576d4e..cd378a1bcea04 100644 --- a/fs/file.c +++ b/fs/file.c @@ -264,7 +264,9 @@ static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt) static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt) { __clear_bit(fd, fdt->open_fds); - __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits); + fd /= BITS_PER_LONG; + if (test_bit(fd, fdt->full_fds_bits)) + __clear_bit(fd, fdt->full_fds_bits); } /* From fcdc75e4e2c1beb7741bbb6da9bb4e825c679802 Mon Sep 17 00:00:00 2001 From: Yu Ma Date: Wed, 17 Jul 2024 10:50:18 -0400 Subject: [PATCH 05/85] fs/file.c: add fast path in find_next_fd() mainline inclusion from mainline-v6.13-rc1 category: performance Skip 2-levels searching via find_next_zero_bit() when there is free slot in the word contains next_fd, as: (1) next_fd indicates the lower bound for the first free fd. (2) There is fast path inside of find_next_zero_bit() when size<=64 to speed up searching. (3) After fdt is expanded (the bitmap size doubled for each time of expansion), it would never be shrunk. The search size increases but there are few open fds available here. This fast path is proposed by Mateusz Guzik , and agreed by Jan Kara , which is more generic and scalable than previous versions. And on top of patch 1 and 2, it improves pts/blogbench-1.1.0 read by 8% and write by 4% on Intel ICX 160 cores configuration with v6.10-rc7. Reviewed-by: Jan Kara Reviewed-by: Tim Chen Signed-off-by: Yu Ma Link: https://lore.kernel.org/r/20240717145018.3972922-4-yu.ma@intel.com Signed-off-by: Christian Brauner Signed-off-by: Al Viro (cherry picked from commit 0c40bf47cf2d9e1413b1e62826c89c2341e66e40) --- fs/file.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/file.c b/fs/file.c index cd378a1bcea04..225cd0ad50c31 100644 --- a/fs/file.c +++ b/fs/file.c @@ -467,6 +467,15 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start) unsigned int maxfd = fdt->max_fds; /* always multiple of BITS_PER_LONG */ unsigned int maxbit = maxfd / BITS_PER_LONG; unsigned int bitbit = start / BITS_PER_LONG; + unsigned int bit; + + /* + * Try to avoid looking at the second level bitmap + */ + bit = find_next_zero_bit(&fdt->open_fds[bitbit], BITS_PER_LONG, + start & (BITS_PER_LONG - 1)); + if (bit < BITS_PER_LONG) + return bit + bitbit * BITS_PER_LONG; bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; if (bitbit >= maxfd) From bfaf16ca7d6dbf05c2c8868746091fa79f86af72 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jan 2025 09:13:23 +0100 Subject: [PATCH 06/85] Revert "ovl: support encoding fid from inode with no alias" This reverts commit a1a541fbfa7e97c1100144db34b57553d7164ce5 which is commit c45beebfde34aa71afbc48b2c54cdda623515037 upstream. It is reported to part of a series that causes problems in the 6.6.y tree, so revert it at this point in time and it can come back later if still needed. Reported-by: Ignat Korchagin Link: https://lore.kernel.org/r/ACD4D6CC-C4D5-4657-A805-03C34559046E@cloudflare.com Cc: Dmitry Safonov Cc: Amir Goldstein Cc: Christian Brauner Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 950b604384fd75d62e860bec7135b2b62eb4d508) --- fs/overlayfs/export.c | 46 ++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 3a17e4366f28c..c56e4e0b8054c 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -181,37 +181,35 @@ static int ovl_connect_layer(struct dentry *dentry) * * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error. */ -static int ovl_check_encode_origin(struct inode *inode) +static int ovl_check_encode_origin(struct dentry *dentry) { - struct ovl_fs *ofs = OVL_FS(inode->i_sb); + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool decodable = ofs->config.nfs_export; - struct dentry *dentry; - int err; /* No upper layer? */ if (!ovl_upper_mnt(ofs)) return 1; /* Lower file handle for non-upper non-decodable */ - if (!ovl_inode_upper(inode) && !decodable) + if (!ovl_dentry_upper(dentry) && !decodable) return 1; /* Upper file handle for pure upper */ - if (!ovl_inode_lower(inode)) + if (!ovl_dentry_lower(dentry)) return 0; /* * Root is never indexed, so if there's an upper layer, encode upper for * root. */ - if (inode == d_inode(inode->i_sb->s_root)) + if (dentry == dentry->d_sb->s_root) return 0; /* * Upper decodable file handle for non-indexed upper. */ - if (ovl_inode_upper(inode) && decodable && - !ovl_test_flag(OVL_INDEX, inode)) + if (ovl_dentry_upper(dentry) && decodable && + !ovl_test_flag(OVL_INDEX, d_inode(dentry))) return 0; /* @@ -220,25 +218,17 @@ static int ovl_check_encode_origin(struct inode *inode) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (!decodable || !S_ISDIR(inode->i_mode)) - return 1; - - dentry = d_find_any_alias(inode); - if (!dentry) - return -ENOENT; - - err = ovl_connect_layer(dentry); - dput(dentry); - if (err < 0) - return err; + if (d_is_dir(dentry) && decodable) + return ovl_connect_layer(dentry); /* Lower file handle for indexed and non-upper dir/non-dir */ return 1; } -static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode, +static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, u32 *fid, int buflen) { + struct inode *inode = d_inode(dentry); struct ovl_fh *fh = NULL; int err, enc_lower; int len; @@ -247,7 +237,7 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode, * Check if we should encode a lower or upper file handle and maybe * copy up an ancestor to make lower file handle connectable. */ - err = enc_lower = ovl_check_encode_origin(inode); + err = enc_lower = ovl_check_encode_origin(dentry); if (enc_lower < 0) goto fail; @@ -267,8 +257,8 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode, return err; fail: - pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n", - inode->i_ino, err); + pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n", + dentry, err); goto out; } @@ -276,13 +266,19 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, struct inode *parent) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); + struct dentry *dentry; int bytes, buflen = *max_len << 2; /* TODO: encode connectable file handles */ if (parent) return FILEID_INVALID; - bytes = ovl_dentry_to_fid(ofs, inode, fid, buflen); + dentry = d_find_any_alias(inode); + if (!dentry) + return FILEID_INVALID; + + bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); + dput(dentry); if (bytes <= 0) return FILEID_INVALID; From e96df865e18b263842b28383a8ed485c1afa085a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jan 2025 09:13:38 +0100 Subject: [PATCH 07/85] Revert "ovl: pass realinode to ovl_encode_real_fh() instead of realdentry" This reverts commit a3f8a2b13a277d942c810d2ccc654d5bc824a430 which is commit 07aeefae7ff44d80524375253980b1bdee2396b0 upstream. It is reported to part of a series that causes problems in the 6.6.y tree, so revert it at this point in time and it can come back later if still needed. Reported-by: Ignat Korchagin Link: https://lore.kernel.org/r/ACD4D6CC-C4D5-4657-A805-03C34559046E@cloudflare.com Cc: Dmitry Safonov Cc: Amir Goldstein Cc: Christian Brauner Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d1c53de4463be3b1b6fbeaefe7ccb1a7f8f33de7) --- fs/overlayfs/copy_up.c | 11 +++++------ fs/overlayfs/export.c | 5 ++--- fs/overlayfs/namei.c | 4 ++-- fs/overlayfs/overlayfs.h | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f14c412c56097..5c9af24bae4aa 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -371,13 +371,13 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry, return err; } -struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper) { struct ovl_fh *fh; int fh_type, dwords; int buflen = MAX_HANDLE_SZ; - uuid_t *uuid = &realinode->i_sb->s_uuid; + uuid_t *uuid = &real->d_sb->s_uuid; int err; /* Make sure the real fid stays 32bit aligned */ @@ -394,8 +394,7 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, * the price or reconnecting the dentry. */ dwords = buflen >> 2; - fh_type = exportfs_encode_inode_fh(realinode, (void *)fh->fb.fid, - &dwords, NULL, 0); + fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0); buflen = (dwords << 2); err = -EIO; @@ -437,7 +436,7 @@ struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin) if (!ovl_can_decode_fh(origin->d_sb)) return NULL; - return ovl_encode_real_fh(ofs, d_inode(origin), false); + return ovl_encode_real_fh(ofs, origin, false); } int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, @@ -462,7 +461,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, const struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, d_inode(upper), true); + fh = ovl_encode_real_fh(ofs, upper, true); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index c56e4e0b8054c..611ff567a1aa6 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -228,7 +228,6 @@ static int ovl_check_encode_origin(struct dentry *dentry) static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, u32 *fid, int buflen) { - struct inode *inode = d_inode(dentry); struct ovl_fh *fh = NULL; int err, enc_lower; int len; @@ -242,8 +241,8 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, goto fail; /* Encode an upper or lower file handle */ - fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_inode_lower(inode) : - ovl_inode_upper(inode), !enc_lower); + fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) : + ovl_dentry_upper(dentry), !enc_lower); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 2d2ef671b36ba..f10ac4ae35f0a 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -536,7 +536,7 @@ int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper); + fh = ovl_encode_real_fh(ofs, real, is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) { fh = NULL; @@ -732,7 +732,7 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, d_inode(origin), false); + fh = ovl_encode_real_fh(ofs, origin, false); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ca63a26a6170b..61e03d664d7d7 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -832,7 +832,7 @@ int ovl_copy_up_with_data(struct dentry *dentry); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, From aa4380e04da9015cae91859adeb9a2c9fd9c2652 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jan 2025 09:13:50 +0100 Subject: [PATCH 08/85] Revert "ovl: do not encode lower fh with upper sb_writers held" This reverts commit 26423e18cd6f709ca4fe7194c29c11658cd0cdd0 which is commit 5b02bfc1e7e3811c5bf7f0fa626a0694d0dbbd77 upstream. It is reported to part of a series that causes problems in the 6.6.y tree, so revert it at this point in time and it can come back later if still needed. Reported-by: Ignat Korchagin Link: https://lore.kernel.org/r/ACD4D6CC-C4D5-4657-A805-03C34559046E@cloudflare.com Cc: Dmitry Safonov Cc: Amir Goldstein Cc: Christian Brauner Cc: Sasha Levin Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1795ca6571199008f67afa9e88cddf199ddde9ca) --- fs/overlayfs/copy_up.c | 53 +++++++++++++++------------------------- fs/overlayfs/namei.c | 37 +++++++--------------------- fs/overlayfs/overlayfs.h | 26 ++++++-------------- fs/overlayfs/super.c | 20 +++++---------- fs/overlayfs/util.c | 10 -------- 5 files changed, 42 insertions(+), 104 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 5c9af24bae4aa..ada3fcc9c6d50 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -426,29 +426,29 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, return ERR_PTR(err); } -struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin) +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper) { + const struct ovl_fh *fh = NULL; + int err; + /* * When lower layer doesn't support export operations store a 'null' fh, * so we can use the overlay.origin xattr to distignuish between a copy * up and a pure upper inode. */ - if (!ovl_can_decode_fh(origin->d_sb)) - return NULL; - - return ovl_encode_real_fh(ofs, origin, false); -} - -int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, - struct dentry *upper) -{ - int err; + if (ovl_can_decode_fh(lower->d_sb)) { + fh = ovl_encode_real_fh(ofs, lower, false); + if (IS_ERR(fh)) + return PTR_ERR(fh); + } /* * Do not fail when upper doesn't support xattrs. */ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); + kfree(fh); /* Ignore -EPERM from setting "user.*" on symlink/special */ return err == -EPERM ? 0 : err; @@ -476,7 +476,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, * * Caller must hold i_mutex on indexdir. */ -static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, +static int ovl_create_index(struct dentry *dentry, struct dentry *origin, struct dentry *upper) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); @@ -502,7 +502,7 @@ static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO; - err = ovl_get_index_name_fh(fh, &name); + err = ovl_get_index_name(ofs, origin, &name); if (err) return err; @@ -541,7 +541,6 @@ struct ovl_copy_up_ctx { struct dentry *destdir; struct qstr destname; struct dentry *workdir; - const struct ovl_fh *origin_fh; bool origin; bool indexed; bool metacopy; @@ -638,7 +637,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin_fh(ofs, c->origin_fh, temp); + err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); if (err) return err; } @@ -750,7 +749,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) goto cleanup; if (S_ISDIR(c->stat.mode) && c->indexed) { - err = ovl_create_index(c->dentry, c->origin_fh, temp); + err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); if (err) goto cleanup; } @@ -862,8 +861,6 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); - struct dentry *origin = c->lowerpath.dentry; - struct ovl_fh *fh = NULL; bool to_index = false; /* @@ -880,25 +877,17 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) to_index = true; } - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) { - fh = ovl_get_origin_fh(ofs, origin); - if (IS_ERR(fh)) - return PTR_ERR(fh); - - /* origin_fh may be NULL */ - c->origin_fh = fh; + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) c->origin = true; - } if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); - err = ovl_get_index_name(ofs, origin, &c->destname); + err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname); if (err) - goto out_free_fh; + return err; } else if (WARN_ON(!c->parent)) { /* Disconnected dentry must be copied up to index dir */ - err = -EIO; - goto out_free_fh; + return -EIO; } else { /* * Mark parent "impure" because it may now contain non-pure @@ -906,7 +895,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) */ err = ovl_set_impure(c->parent, c->destdir); if (err) - goto out_free_fh; + return err; } /* Should we copyup with O_TMPFILE or with workdir? */ @@ -938,8 +927,6 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) out: if (to_index) kfree(c->destname.name); -out_free_fh: - kfree(fh); return err; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f10ac4ae35f0a..80391c687c2ad 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -507,19 +507,6 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, return err; } -int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, const struct ovl_fh *fh, - bool is_upper, bool set) -{ - int err; - - err = ovl_verify_fh(ofs, dentry, ox, fh); - if (set && err == -ENODATA) - err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); - - return err; -} - /* * Verify that @real dentry matches the file handle stored in xattr @name. * @@ -528,9 +515,9 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, * * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error. */ -int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, struct dentry *real, - bool is_upper, bool set) +int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, + enum ovl_xattr ox, struct dentry *real, bool is_upper, + bool set) { struct inode *inode; struct ovl_fh *fh; @@ -543,7 +530,9 @@ int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, goto fail; } - err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set); + err = ovl_verify_fh(ofs, dentry, ox, fh); + if (set && err == -ENODATA) + err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len); if (err) goto fail; @@ -559,7 +548,6 @@ int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, goto out; } - /* Get upper dentry from index */ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected) @@ -696,7 +684,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) goto out; } -int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name) +static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) { char *n, *s; @@ -885,27 +873,20 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, struct dentry *lower, struct dentry *upper) { - const struct ovl_fh *fh; int err; if (ovl_check_origin_xattr(ofs, upper)) return 0; - fh = ovl_get_origin_fh(ofs, lower); - if (IS_ERR(fh)) - return PTR_ERR(fh); - err = ovl_want_write(dentry); if (err) - goto out; + return err; - err = ovl_set_origin_fh(ofs, fh, upper); + err = ovl_set_origin(ofs, lower, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); ovl_drop_write(dentry); -out: - kfree(fh); return err; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 61e03d664d7d7..09ca82ed0f8ce 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -632,15 +632,11 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, const struct ovl_fh *fh, - bool is_upper, bool set); -int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry, - enum ovl_xattr ox, struct dentry *real, - bool is_upper, bool set); + enum ovl_xattr ox, struct dentry *real, bool is_upper, + bool set); struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); -int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name); int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); @@ -652,24 +648,17 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry); -static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper, - const struct ovl_fh *fh, bool set) -{ - return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set); -} - static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) { - return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin, - false, set); + return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin, + false, set); } static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, struct dentry *upper, bool set) { - return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper, - true, set); + return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set); } /* readdir.c */ @@ -834,9 +823,8 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentr int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); -struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); -int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, - struct dentry *upper); +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e2574034c3fa1..2c056d737c27c 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -879,20 +879,15 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, { struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *indexdir; - struct dentry *origin = ovl_lowerstack(oe)->dentry; - const struct ovl_fh *fh; int err; - fh = ovl_get_origin_fh(ofs, origin); - if (IS_ERR(fh)) - return PTR_ERR(fh); - err = mnt_want_write(mnt); if (err) - goto out_free_fh; + return err; /* Verify lower root is upper root origin */ - err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true); + err = ovl_verify_origin(ofs, upperpath->dentry, + ovl_lowerstack(oe)->dentry, true); if (err) { pr_err("failed to verify upper root origin\n"); goto out; @@ -924,10 +919,9 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, * directory entries. */ if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { - err = ovl_verify_origin_xattr(ofs, ofs->indexdir, - OVL_XATTR_ORIGIN, - upperpath->dentry, true, - false); + err = ovl_verify_set_fh(ofs, ofs->indexdir, + OVL_XATTR_ORIGIN, + upperpath->dentry, true, false); if (err) pr_err("failed to verify index dir 'origin' xattr\n"); } @@ -945,8 +939,6 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, out: mnt_drop_write(mnt); -out_free_fh: - kfree(fh); return err; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 4e6b747e0f2e2..0bf3ffcd072f6 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -976,18 +976,12 @@ static void ovl_cleanup_index(struct dentry *dentry) struct dentry *index = NULL; struct inode *inode; struct qstr name = { }; - bool got_write = false; int err; err = ovl_get_index_name(ofs, lowerdentry, &name); if (err) goto fail; - err = ovl_want_write(dentry); - if (err) - goto fail; - - got_write = true; inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", @@ -1025,8 +1019,6 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail; out: - if (got_write) - ovl_drop_write(dentry); kfree(name.name); dput(index); return; @@ -1097,8 +1089,6 @@ void ovl_nlink_end(struct dentry *dentry) { struct inode *inode = d_inode(dentry); - ovl_drop_write(dentry); - if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { const struct cred *old_cred; From 9ebc1ce881feba13fe0222ef747fe1df51aab87a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jan 2025 09:49:10 +0100 Subject: [PATCH 09/85] Linux 6.6.73 Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3b4299ff7a25480d96c5e9a84b879e5193447d28) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5aed7d46dbfef..a349c26c6fba3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 72 +SUBLEVEL = 73 EXTRAVERSION = NAME = Pinguïn Aangedreven From c534a5f75723b7c16bdd4a6410cc0dbd40ae75a6 Mon Sep 17 00:00:00 2001 From: Mingcong Bai Date: Thu, 23 Jan 2025 14:23:13 +0800 Subject: [PATCH 10/85] mips: configs: enable SquashFS compression support, OverlayFS Enable compressed SquashFS and OverlayFS support as required by the installation media. Also tune SquashFS defaults to optimise performance. Signed-off-by: Mingcong Bai --- arch/mips/configs/deepin_loongson3_desktop_defconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/mips/configs/deepin_loongson3_desktop_defconfig b/arch/mips/configs/deepin_loongson3_desktop_defconfig index 76cb956c8d801..b352c46cf43a5 100644 --- a/arch/mips/configs/deepin_loongson3_desktop_defconfig +++ b/arch/mips/configs/deepin_loongson3_desktop_defconfig @@ -351,6 +351,7 @@ CONFIG_QFMT_V2=m CONFIG_AUTOFS_FS=y CONFIG_FUSE_FS=m CONFIG_VIRTIO_FS=m +CONFIG_OVERLAY_FS=y CONFIG_FSCACHE=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -364,7 +365,13 @@ CONFIG_TMPFS_POSIX_ACL=y CONFIG_CONFIGFS_FS=y CONFIG_CRAMFS=m CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_FILE_DIRECT=y +CONFIG_SQUASHFS_CHOICE_DECOMP_BY_MOUNT=y CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZ4=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y CONFIG_NFS_FS=m CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=m From a8875ed488653bf5b75e0bda754f56397fba8933 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Wed, 8 Jan 2025 17:50:28 +0800 Subject: [PATCH 11/85] Bluetooth: btmtk: Fix failed to send func ctrl for MediaTek devices. [ Upstream commit 67dba2c28fe0af7e25ea1aeade677162ed05310a ] Use usb_autopm_get_interface() and usb_autopm_put_interface() in btmtk_usb_shutdown(), it could send func ctrl after enabling autosuspend. Bluetooth: btmtk_usb_hci_wmt_sync() hci0: Execution of wmt command timed out Bluetooth: btmtk_usb_shutdown() hci0: Failed to send wmt func ctrl (-110) Fixes: 5c5e8c52e3ca ("Bluetooth: btmtk: move btusb_mtk_[setup, shutdown] to btmtk.c") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin (cherry picked from commit cd4522bd3632489626d09b3d197fa887cf623c74) --- drivers/bluetooth/btmtk.c | 7 +++++++ net/bluetooth/rfcomm/tty.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index d4183f5797720..538147df43cd7 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -1473,10 +1473,15 @@ EXPORT_SYMBOL_GPL(btmtk_usb_setup); int btmtk_usb_shutdown(struct hci_dev *hdev) { + struct btmtk_data *data = hci_get_priv(hdev); struct btmtk_hci_wmt_params wmt_params; u8 param = 0; int err; + err = usb_autopm_get_interface(data->intf); + if (err < 0) + return err; + /* Disable the device */ wmt_params.op = BTMTK_WMT_FUNC_CTRL; wmt_params.flag = 0; @@ -1487,9 +1492,11 @@ int btmtk_usb_shutdown(struct hci_dev *hdev) err = btmtk_usb_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err); + usb_autopm_put_interface(data->intf); return err; } + usb_autopm_put_interface(data->intf); return 0; } EXPORT_SYMBOL_GPL(btmtk_usb_shutdown); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 94ec913dfb76e..df02c4201bca3 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -201,14 +201,14 @@ static ssize_t address_show(struct device *tty_dev, struct device_attribute *attr, char *buf) { struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); - return sprintf(buf, "%pMR\n", &dev->dst); + return sysfs_emit(buf, "%pMR\n", &dev->dst); } static ssize_t channel_show(struct device *tty_dev, struct device_attribute *attr, char *buf) { struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); - return sprintf(buf, "%d\n", dev->channel); + return sysfs_emit(buf, "%d\n", dev->channel); } static DEVICE_ATTR_RO(address); From e4d1dfc73e3dc17f370a3f25179a9a3851433b21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BF=98=E6=80=80?= Date: Thu, 23 Jan 2025 01:43:09 +0800 Subject: [PATCH 12/85] [FROMLIST] net: stmmac: Optimize cache prefetch in RX path next inclusion from next-20250122 category: performance Current code prefetches cache lines for the received frame first, and then dma_sync_single_for_cpu() against this frame, this is wrong. Cache prefetch should be triggered after dma_sync_single_for_cpu(). This patch brings ~2.8% driver performance improvement in a TCP RX throughput test with iPerf tool on a single isolated Cortex-A65 CPU core, 2.84 Gbits/sec increased to 2.92 Gbits/sec. Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Alexander Lobakin Reviewed-by: Yanteng Si Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4991110cce12b..4114171eb3c4c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5408,10 +5408,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) /* Buffer is good. Go on. */ - prefetch(page_address(buf->page) + buf->page_offset); - if (buf->sec_page) - prefetch(page_address(buf->sec_page)); - buf1_len = stmmac_rx_buf1_len(priv, p, status, len); len += buf1_len; buf2_len = stmmac_rx_buf2_len(priv, p, status, len); @@ -5433,7 +5429,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_sync_single_for_cpu(priv->device, buf->addr, buf1_len, dma_dir); - + prefetch(page_address(buf->page) + buf->page_offset); xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq); xdp_prepare_buff(&ctx.xdp, page_address(buf->page), buf->page_offset, buf1_len, true); From 00b34caa1917cb7d8dba398607a2e7ecc5c695cd Mon Sep 17 00:00:00 2001 From: Sudheer Kumar Doredla Date: Wed, 8 Jan 2025 22:54:33 +0530 Subject: [PATCH 13/85] net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field() [ Upstream commit 03d120f27d050336f7e7d21879891542c4741f81 ] CPSW ALE has 75-bit ALE entries stored across three 32-bit words. The cpsw_ale_get_field() and cpsw_ale_set_field() functions support ALE field entries spanning up to two words at the most. The cpsw_ale_get_field() and cpsw_ale_set_field() functions work as expected when ALE field spanned across word1 and word2, but fails when ALE field spanned across word2 and word3. For example, while reading the ALE field spanned across word2 and word3 (i.e. bits 62 to 64), the word3 data shifted to an incorrect position due to the index becoming zero while flipping. The same issue occurred when setting an ALE entry. This issue has not been seen in practice but will be an issue in the future if the driver supports accessing ALE fields spanning word2 and word3 Fix the methods to handle getting/setting fields spanning up to two words. Fixes: b685f1a58956 ("net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()/cpsw_ale_set_field()") Signed-off-by: Sudheer Kumar Doredla Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Siddharth Vadapalli Link: https://patch.msgid.link/20250108172433.311694-1-s-doredla@ti.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 97ab30ee4b56df69f31d7e5aefb21c16ff25ece9) --- drivers/net/ethernet/ti/cpsw_ale.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index 64bf22cd860c9..9eccc7064c2b0 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -106,15 +106,15 @@ struct cpsw_ale_dev_id { static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) { - int idx, idx2; + int idx, idx2, index; u32 hi_val = 0; idx = start / 32; idx2 = (start + bits - 1) / 32; /* Check if bits to be fetched exceed a word */ if (idx != idx2) { - idx2 = 2 - idx2; /* flip */ - hi_val = ale_entry[idx2] << ((idx2 * 32) - start); + index = 2 - idx2; /* flip */ + hi_val = ale_entry[index] << ((idx2 * 32) - start); } start -= idx * 32; idx = 2 - idx; /* flip */ @@ -124,16 +124,16 @@ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits, u32 value) { - int idx, idx2; + int idx, idx2, index; value &= BITMASK(bits); idx = start / 32; idx2 = (start + bits - 1) / 32; /* Check if bits to be set exceed a word */ if (idx != idx2) { - idx2 = 2 - idx2; /* flip */ - ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32))); - ale_entry[idx2] |= (value >> ((idx2 * 32) - start)); + index = 2 - idx2; /* flip */ + ale_entry[index] &= ~(BITMASK(bits + start - (idx2 * 32))); + ale_entry[index] |= (value >> ((idx2 * 32) - start)); } start -= idx * 32; idx = 2 - idx; /* flip */ From 528eb23ff7920736223bed9e841448dc321946b7 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 10 Jan 2025 14:21:55 +0100 Subject: [PATCH 14/85] bpf: Fix bpf_sk_select_reuseport() memory leak [ Upstream commit b3af60928ab9129befa65e6df0310d27300942bf ] As pointed out in the original comment, lookup in sockmap can return a TCP ESTABLISHED socket. Such TCP socket may have had SO_ATTACH_REUSEPORT_EBPF set before it was ESTABLISHED. In other words, a non-NULL sk_reuseport_cb does not imply a non-refcounted socket. Drop sk's reference in both error paths. unreferenced object 0xffff888101911800 (size 2048): comm "test_progs", pid 44109, jiffies 4297131437 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 80 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 9336483b): __kmalloc_noprof+0x3bf/0x560 __reuseport_alloc+0x1d/0x40 reuseport_alloc+0xca/0x150 reuseport_attach_prog+0x87/0x140 sk_reuseport_attach_bpf+0xc8/0x100 sk_setsockopt+0x1181/0x1990 do_sock_setsockopt+0x12b/0x160 __sys_setsockopt+0x7b/0xc0 __x64_sys_setsockopt+0x1b/0x30 do_syscall_64+0x93/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 64d85290d79c ("bpf: Allow bpf_map_lookup_elem for SOCKMAP and SOCKHASH") Signed-off-by: Michal Luczaj Reviewed-by: Martin KaFai Lau Link: https://patch.msgid.link/20250110-reuseport-memleak-v1-1-fa1ddab0adfe@rbox.co Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 29e6db12e92e393fa7b448bdf73b4a967fdb7e81) --- net/core/filter.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 34320ce70096a..5881944f1681c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11190,6 +11190,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; + int err; selected_sk = map->ops->map_lookup_elem(map, key); if (!selected_sk) @@ -11197,10 +11198,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { - /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ - if (sk_is_refcounted(selected_sk)) - sock_put(selected_sk); - /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. @@ -11208,24 +11205,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, * Other maps (e.g. sock_map) do not provide this guarantee and * the sk may never be in the reuseport group to begin with. */ - return is_sockarray ? -ENOENT : -EINVAL; + err = is_sockarray ? -ENOENT : -EINVAL; + goto error; } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk = reuse_kern->sk; - if (sk->sk_protocol != selected_sk->sk_protocol) - return -EPROTOTYPE; - else if (sk->sk_family != selected_sk->sk_family) - return -EAFNOSUPPORT; - - /* Catch all. Likely bound to a different sockaddr. */ - return -EBADFD; + if (sk->sk_protocol != selected_sk->sk_protocol) { + err = -EPROTOTYPE; + } else if (sk->sk_family != selected_sk->sk_family) { + err = -EAFNOSUPPORT; + } else { + /* Catch all. Likely bound to a different sockaddr. */ + err = -EBADFD; + } + goto error; } reuse_kern->selected_sk = selected_sk; return 0; +error: + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + + return err; } static const struct bpf_func_proto sk_select_reuseport_proto = { From 9cc7d3b846dafde8fc338207a6fa170877245bf2 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 9 Jan 2025 13:21:24 +0100 Subject: [PATCH 15/85] openvswitch: fix lockup on tx to unregistering netdev with carrier [ Upstream commit 47e55e4b410f7d552e43011baa5be1aab4093990 ] Commit in a fixes tag attempted to fix the issue in the following sequence of calls: do_output -> ovs_vport_send -> dev_queue_xmit -> __dev_queue_xmit -> netdev_core_pick_tx -> skb_tx_hash When device is unregistering, the 'dev->real_num_tx_queues' goes to zero and the 'while (unlikely(hash >= qcount))' loop inside the 'skb_tx_hash' becomes infinite, locking up the core forever. But unfortunately, checking just the carrier status is not enough to fix the issue, because some devices may still be in unregistering state while reporting carrier status OK. One example of such device is a net/dummy. It sets carrier ON on start, but it doesn't implement .ndo_stop to set the carrier off. And it makes sense, because dummy doesn't really have a carrier. Therefore, while this device is unregistering, it's still easy to hit the infinite loop in the skb_tx_hash() from the OVS datapath. There might be other drivers that do the same, but dummy by itself is important for the OVS ecosystem, because it is frequently used as a packet sink for tcpdump while debugging OVS deployments. And when the issue is hit, the only way to recover is to reboot. Fix that by also checking if the device is running. The running state is handled by the net core during unregistering, so it covers unregistering case better, and we don't really need to send packets to devices that are not running anyway. While only checking the running state might be enough, the carrier check is preserved. The running and the carrier states seem disjoined throughout the code and different drivers. And other core functions like __dev_direct_xmit() check both before attempting to transmit a packet. So, it seems safer to check both flags in OVS as well. Fixes: 066b86787fa3 ("net: openvswitch: fix race on port output") Reported-by: Friedrich Weber Closes: https://mail.openvswitch.org/pipermail/ovs-discuss/2025-January/053423.html Signed-off-by: Ilya Maximets Tested-by: Friedrich Weber Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20250109122225.4034688-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 3a82fc742457ba2b7a29baba5c16b1ef2f5cd8f7) --- net/openvswitch/actions.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 4f5cbcaa38386..9445ca97163b4 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -918,7 +918,9 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port, { struct vport *vport = ovs_vport_rcu(dp, out_port); - if (likely(vport && netif_carrier_ok(vport->dev))) { + if (likely(vport && + netif_running(vport->dev) && + netif_carrier_ok(vport->dev))) { u16 mru = OVS_CB(skb)->mru; u32 cutlen = OVS_CB(skb)->cutlen; From ac9d06c981ae78ecc84b12264f8f91b74ffa443d Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Thu, 9 Jan 2025 11:30:39 +0300 Subject: [PATCH 16/85] pktgen: Avoid out-of-bounds access in get_imix_entries [ Upstream commit 76201b5979768500bca362871db66d77cb4c225e ] Passing a sufficient amount of imix entries leads to invalid access to the pkt_dev->imix_entries array because of the incorrect boundary check. UBSAN: array-index-out-of-bounds in net/core/pktgen.c:874:24 index 20 is out of range for type 'imix_pkt [20]' CPU: 2 PID: 1210 Comm: bash Not tainted 6.10.0-rc1 #121 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: dump_stack_lvl lib/dump_stack.c:117 __ubsan_handle_out_of_bounds lib/ubsan.c:429 get_imix_entries net/core/pktgen.c:874 pktgen_if_write net/core/pktgen.c:1063 pde_write fs/proc/inode.c:334 proc_reg_write fs/proc/inode.c:346 vfs_write fs/read_write.c:593 ksys_write fs/read_write.c:644 do_syscall_64 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe arch/x86/entry/entry_64.S:130 Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 52a62f8603f9 ("pktgen: Parse internet mix (imix) input") Signed-off-by: Artem Chernyshev [ fp: allow to fill the array completely; minor changelog cleanup ] Signed-off-by: Fedor Pchelkin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin (cherry picked from commit 3ced6b39e3c6be6b9a25b857834c6565914b793c) --- net/core/pktgen.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0e472f6fab853..359e24c3f22ca 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -850,6 +850,9 @@ static ssize_t get_imix_entries(const char __user *buffer, unsigned long weight; unsigned long size; + if (pkt_dev->n_imix_entries >= MAX_IMIX_ENTRIES) + return -E2BIG; + len = num_arg(&buffer[i], max_digits, &size); if (len < 0) return len; @@ -879,9 +882,6 @@ static ssize_t get_imix_entries(const char __user *buffer, i++; pkt_dev->n_imix_entries++; - - if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES) - return -E2BIG; } while (c == ' '); return i; From 58c14e588a0b94ee8f7a64f0fdf78a2d8487b253 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:42:57 +0000 Subject: [PATCH 17/85] net: add exit_batch_rtnl() method [ Upstream commit fd4f101edbd9f99567ab2adb1f2169579ede7c13 ] Many (struct pernet_operations)->exit_batch() methods have to acquire rtnl. In presence of rtnl mutex pressure, this makes cleanup_net() very slow. This patch adds a new exit_batch_rtnl() method to reduce number of rtnl acquisitions from cleanup_net(). exit_batch_rtnl() handlers are called while rtnl is locked, and devices to be killed can be queued in a list provided as their second argument. A single unregister_netdevice_many() is called right before rtnl is released. exit_batch_rtnl() handlers are called before ->exit() and ->exit_batch() handlers. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-2-edumazet@google.com Signed-off-by: Jakub Kicinski Stable-dep-of: 46841c7053e6 ("gtp: Use for_each_netdev_rcu() in gtp_genl_dump_pdp().") Signed-off-by: Sasha Levin (cherry picked from commit fddc755b3b916b54c96e72bf7d063ecc0e711990) --- include/net/net_namespace.h | 3 +++ net/core/net_namespace.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6f8868260c8fd..77c0c77e2e227 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -448,6 +448,9 @@ struct pernet_operations { void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); + /* Following method is called with RTNL held. */ + void (*exit_batch_rtnl)(struct list_head *net_exit_list, + struct list_head *dev_kill_list); unsigned int *id; size_t size; }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 92b7fea4d495c..70ac9d9bc8770 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -321,8 +321,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; - int error = 0; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); + int error = 0; refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); @@ -360,6 +361,15 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) synchronize_rcu(); + ops = saved_ops; + rtnl_lock(); + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -588,6 +598,7 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -628,6 +639,14 @@ static void cleanup_net(struct work_struct *work) */ synchronize_rcu(); + rtnl_lock(); + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -1170,7 +1189,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_ { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); + + if (ops->exit_batch_rtnl) { + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); } From fc0a7dd65e623e685f3db0a862dfdc0c91fac42b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:03 +0000 Subject: [PATCH 18/85] gtp: use exit_batch_rtnl() method [ Upstream commit 6eedda01b2bfdcf427b37759e053dc27232f3af1 ] exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call per netns. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-8-edumazet@google.com Signed-off-by: Jakub Kicinski Stable-dep-of: 46841c7053e6 ("gtp: Use for_each_netdev_rcu() in gtp_genl_dump_pdp().") Signed-off-by: Sasha Levin (cherry picked from commit 0db04db13c3a5e7b82d51bc98950f7cf977745f1) --- drivers/net/gtp.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 9dd8f66610ce6..60c950066ec5b 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1884,23 +1884,23 @@ static int __net_init gtp_net_init(struct net *net) return 0; } -static void __net_exit gtp_net_exit(struct net *net) +static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - struct gtp_net *gn = net_generic(net, gtp_net_id); - struct gtp_dev *gtp; - LIST_HEAD(list); + struct net *net; - rtnl_lock(); - list_for_each_entry(gtp, &gn->gtp_dev_list, list) - gtp_dellink(gtp->dev, &list); + list_for_each_entry(net, net_list, exit_list) { + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp; - unregister_netdevice_many(&list); - rtnl_unlock(); + list_for_each_entry(gtp, &gn->gtp_dev_list, list) + gtp_dellink(gtp->dev, dev_to_kill); + } } static struct pernet_operations gtp_net_ops = { .init = gtp_net_init, - .exit = gtp_net_exit, + .exit_batch_rtnl = gtp_net_exit_batch_rtnl, .id = >p_net_id, .size = sizeof(struct gtp_net), }; From 7206c15db7f4368d7ac5efa9d5fabb3a7d260abe Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 10 Jan 2025 10:47:52 +0900 Subject: [PATCH 19/85] gtp: Use for_each_netdev_rcu() in gtp_genl_dump_pdp(). [ Upstream commit 46841c7053e6d25fb33e0534ef023833bf03e382 ] gtp_newlink() links the gtp device to a list in dev_net(dev). However, even after the gtp device is moved to another netns, it stays on the list but should be invisible. Let's use for_each_netdev_rcu() for netdev traversal in gtp_genl_dump_pdp(). Note that gtp_dev_list is no longer used under RCU, so list helpers are converted to the non-RCU variant. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Reported-by: Xiao Liang Closes: https://lore.kernel.org/netdev/CABAhCOQdBL6h9M2C+kd+bGivRJ9Q72JUxW+-gur0nub_=PmFPA@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit a568959c28fb7213bebe4d32b6f40e85943bdca2) --- drivers/net/gtp.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 60c950066ec5b..69b89483f1b50 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1096,7 +1096,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, } gn = net_generic(dev_net(dev), gtp_net_id); - list_add_rcu(>p->list, &gn->gtp_dev_list); + list_add(>p->list, &gn->gtp_dev_list); dev->priv_destructor = gtp_destructor; netdev_dbg(dev, "registered new GTP interface\n"); @@ -1122,7 +1122,7 @@ static void gtp_dellink(struct net_device *dev, struct list_head *head) hlist_for_each_entry_safe(pctx, next, >p->tid_hash[i], hlist_tid) pdp_context_delete(pctx); - list_del_rcu(>p->list); + list_del(>p->list); unregister_netdevice_queue(dev, head); } @@ -1690,16 +1690,19 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb, struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; int i, j, bucket = cb->args[0], skip = cb->args[1]; struct net *net = sock_net(skb->sk); + struct net_device *dev; struct pdp_ctx *pctx; - struct gtp_net *gn; - - gn = net_generic(net, gtp_net_id); if (cb->args[4]) return 0; rcu_read_lock(); - list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { + for_each_netdev_rcu(net, dev) { + if (dev->rtnl_link_ops != >p_link_ops) + continue; + + gtp = netdev_priv(dev); + if (last_gtp && last_gtp != gtp) continue; else @@ -1891,9 +1894,9 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, list_for_each_entry(net, net_list, exit_list) { struct gtp_net *gn = net_generic(net, gtp_net_id); - struct gtp_dev *gtp; + struct gtp_dev *gtp, *gtp_next; - list_for_each_entry(gtp, &gn->gtp_dev_list, list) + list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) gtp_dellink(gtp->dev, dev_to_kill); } } From a23cbe8d444b34f862e91152dd2d60d3384e7dcf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 10 Jan 2025 10:47:53 +0900 Subject: [PATCH 20/85] gtp: Destroy device along with udp socket's netns dismantle. [ Upstream commit eb28fd76c0a08a47b470677c6cef9dd1c60e92d1 ] gtp_newlink() links the device to a list in dev_net(dev) instead of src_net, where a udp tunnel socket is created. Even when src_net is removed, the device stays alive on dev_net(dev). Then, removing src_net triggers the splat below. [0] In this example, gtp0 is created in ns2, and the udp socket is created in ns1. ip netns add ns1 ip netns add ns2 ip -n ns1 link add netns ns2 name gtp0 type gtp role sgsn ip netns del ns1 Let's link the device to the socket's netns instead. Now, gtp_net_exit_batch_rtnl() needs another netdev iteration to remove all gtp devices in the netns. [0]: ref_tracker: net notrefcnt@000000003d6e7d05 has 1/2 users at sk_alloc (./include/net/net_namespace.h:345 net/core/sock.c:2236) inet_create (net/ipv4/af_inet.c:326 net/ipv4/af_inet.c:252) __sock_create (net/socket.c:1558) udp_sock_create4 (net/ipv4/udp_tunnel_core.c:18) gtp_create_sock (./include/net/udp_tunnel.h:59 drivers/net/gtp.c:1423) gtp_create_sockets (drivers/net/gtp.c:1447) gtp_newlink (drivers/net/gtp.c:1507) rtnl_newlink (net/core/rtnetlink.c:3786 net/core/rtnetlink.c:3897 net/core/rtnetlink.c:4012) rtnetlink_rcv_msg (net/core/rtnetlink.c:6922) netlink_rcv_skb (net/netlink/af_netlink.c:2542) netlink_unicast (net/netlink/af_netlink.c:1321 net/netlink/af_netlink.c:1347) netlink_sendmsg (net/netlink/af_netlink.c:1891) ____sys_sendmsg (net/socket.c:711 net/socket.c:726 net/socket.c:2583) ___sys_sendmsg (net/socket.c:2639) __sys_sendmsg (net/socket.c:2669) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) WARNING: CPU: 1 PID: 60 at lib/ref_tracker.c:179 ref_tracker_dir_exit (lib/ref_tracker.c:179) Modules linked in: CPU: 1 UID: 0 PID: 60 Comm: kworker/u16:2 Not tainted 6.13.0-rc5-00147-g4c1224501e9d #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: netns cleanup_net RIP: 0010:ref_tracker_dir_exit (lib/ref_tracker.c:179) Code: 00 00 00 fc ff df 4d 8b 26 49 bd 00 01 00 00 00 00 ad de 4c 39 f5 0f 85 df 00 00 00 48 8b 74 24 08 48 89 df e8 a5 cc 12 02 90 <0f> 0b 90 48 8d 6b 44 be 04 00 00 00 48 89 ef e8 80 de 67 ff 48 89 RSP: 0018:ff11000009a07b60 EFLAGS: 00010286 RAX: 0000000000002bd3 RBX: ff1100000f4e1aa0 RCX: 1ffffffff0e40ac6 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8423ee3c RBP: ff1100000f4e1af0 R08: 0000000000000001 R09: fffffbfff0e395ae R10: 0000000000000001 R11: 0000000000036001 R12: ff1100000f4e1af0 R13: dead000000000100 R14: ff1100000f4e1af0 R15: dffffc0000000000 FS: 0000000000000000(0000) GS:ff1100006ce80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9b2464bd98 CR3: 0000000005286005 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn (kernel/panic.c:748) ? ref_tracker_dir_exit (lib/ref_tracker.c:179) ? report_bug (lib/bug.c:201 lib/bug.c:219) ? handle_bug (arch/x86/kernel/traps.c:285) ? exc_invalid_op (arch/x86/kernel/traps.c:309 (discriminator 1)) ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621) ? _raw_spin_unlock_irqrestore (./arch/x86/include/asm/irqflags.h:42 ./arch/x86/include/asm/irqflags.h:97 ./arch/x86/include/asm/irqflags.h:155 ./include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194) ? ref_tracker_dir_exit (lib/ref_tracker.c:179) ? __pfx_ref_tracker_dir_exit (lib/ref_tracker.c:158) ? kfree (mm/slub.c:4613 mm/slub.c:4761) net_free (net/core/net_namespace.c:476 net/core/net_namespace.c:467) cleanup_net (net/core/net_namespace.c:664 (discriminator 3)) process_one_work (kernel/workqueue.c:3229) worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391) kthread (kernel/kthread.c:389) ret_from_fork (arch/x86/kernel/process.c:147) ret_from_fork_asm (arch/x86/entry/entry_64.S:257) Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Reported-by: Xiao Liang Closes: https://lore.kernel.org/netdev/20250104125732.17335-1-shaw.leon@gmail.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit ab1992964fd74ee0f9153b0b5b6bfdc0dd608853) --- drivers/net/gtp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 69b89483f1b50..47238c3ec82e7 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1095,7 +1095,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, goto out_encap; } - gn = net_generic(dev_net(dev), gtp_net_id); + gn = net_generic(src_net, gtp_net_id); list_add(>p->list, &gn->gtp_dev_list); dev->priv_destructor = gtp_destructor; @@ -1895,6 +1895,11 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, list_for_each_entry(net, net_list, exit_list) { struct gtp_net *gn = net_generic(net, gtp_net_id); struct gtp_dev *gtp, *gtp_next; + struct net_device *dev; + + for_each_netdev(net, dev) + if (dev->rtnl_link_ops == >p_link_ops) + gtp_dellink(dev, dev_to_kill); list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) gtp_dellink(gtp->dev, dev_to_kill); From c722a48e5ee7840bacee46bd0ab5f479b0852dc6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 13 Jan 2025 09:18:39 +0300 Subject: [PATCH 21/85] nfp: bpf: prevent integer overflow in nfp_bpf_event_output() [ Upstream commit 16ebb6f5b6295c9688749862a39a4889c56227f8 ] The "sizeof(struct cmsg_bpf_event) + pkt_size + data_size" math could potentially have an integer wrapping bug on 32bit systems. Check for this and return an error. Fixes: 9816dd35ecec ("nfp: bpf: perf event output helpers support") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/6074805b-e78d-4b8a-bf05-e929b5377c28@stanley.mountain Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 6fb280cace31024137fa79e5b2f2d64f4e352ddb) --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 9d97cd281f18e..c03558adda91e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -458,7 +458,8 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, map_id_full = be64_to_cpu(cbe->map_ptr); map_id = map_id_full; - if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) + if (size_add(pkt_size, data_size) > INT_MAX || + len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size) return -EINVAL; if (cbe->hdr.ver != NFP_CCM_ABI_VERSION) return -EINVAL; From 4a4a25b1910520dc35868ac950f37e1cdded1c4d Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 13 Jan 2025 11:30:00 -0500 Subject: [PATCH 22/85] net: xilinx: axienet: Fix IRQ coalescing packet count overflow [ Upstream commit c17ff476f53afb30f90bb3c2af77de069c81a622 ] If coalesce_count is greater than 255 it will not fit in the register and will overflow. This can be reproduced by running # ethtool -C ethX rx-frames 256 which will result in a timeout of 0us instead. Fix this by checking for invalid values and reporting an error. Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Sean Anderson Reviewed-by: Shannon Nelson Reviewed-by: Radhey Shyam Pandey Link: https://patch.msgid.link/20250113163001.2335235-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit c010a1f18020eea93dd9e63b0a66575f96839f4e) --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9f779653ed622..02e11827440b5 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1571,6 +1571,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev, return -EFAULT; } + if (ecoalesce->rx_max_coalesced_frames > 255 || + ecoalesce->tx_max_coalesced_frames > 255) { + NL_SET_ERR_MSG(extack, "frames must be less than 256"); + return -EINVAL; + } + if (ecoalesce->rx_max_coalesced_frames) lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; if (ecoalesce->rx_coalesce_usecs) From dd1a385841fb1cd6537f7b3889d9902e15a7ee75 Mon Sep 17 00:00:00 2001 From: Kevin Groeneveld Date: Mon, 13 Jan 2025 10:48:45 -0500 Subject: [PATCH 23/85] net: fec: handle page_pool_dev_alloc_pages error [ Upstream commit 001ba0902046cb6c352494df610718c0763e77a5 ] The fec_enet_update_cbd function calls page_pool_dev_alloc_pages but did not handle the case when it returned NULL. There was a WARN_ON(!new_page) but it would still proceed to use the NULL pointer and then crash. This case does seem somewhat rare but when the system is under memory pressure it can happen. One case where I can duplicate this with some frequency is when writing over a smbd share to a SATA HDD attached to an imx6q. Setting /proc/sys/vm/min_free_kbytes to higher values also seems to solve the problem for my test case. But it still seems wrong that the fec driver ignores the memory allocation error and can crash. This commit handles the allocation error by dropping the current packet. Fixes: 95698ff6177b5 ("net: fec: using page pool to manage RX buffers") Signed-off-by: Kevin Groeneveld Reviewed-by: Jacob Keller Reviewed-by: Wei Fang Link: https://patch.msgid.link/20250113154846.1765414-1-kgroeneveld@lenbrook.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit 341f43053ff9bf50fd27b0b1eebad64659476b7d) --- drivers/net/ethernet/freescale/fec_main.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index e8d9a0eba4d6b..8f5cc1f233188 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1572,19 +1572,22 @@ static void fec_enet_tx(struct net_device *ndev, int budget) fec_enet_tx_queue(ndev, i, budget); } -static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, +static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq, struct bufdesc *bdp, int index) { struct page *new_page; dma_addr_t phys_addr; new_page = page_pool_dev_alloc_pages(rxq->page_pool); - WARN_ON(!new_page); - rxq->rx_skb_info[index].page = new_page; + if (unlikely(!new_page)) + return -ENOMEM; + rxq->rx_skb_info[index].page = new_page; rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM; phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM; bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); + + return 0; } static u32 @@ -1679,6 +1682,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) int cpu = smp_processor_id(); struct xdp_buff xdp; struct page *page; + __fec32 cbd_bufaddr; u32 sub_len = 4; #if !defined(CONFIG_M5272) @@ -1743,12 +1747,17 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) index = fec_enet_get_bd_index(bdp, &rxq->bd); page = rxq->rx_skb_info[index].page; + cbd_bufaddr = bdp->cbd_bufaddr; + if (fec_enet_update_cbd(rxq, bdp, index)) { + ndev->stats.rx_dropped++; + goto rx_processing_done; + } + dma_sync_single_for_cpu(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), + fec32_to_cpu(cbd_bufaddr), pkt_len, DMA_FROM_DEVICE); prefetch(page_address(page)); - fec_enet_update_cbd(rxq, bdp, index); if (xdp_prog) { xdp_buff_clear_frags_flag(&xdp); From d0ad0c42f15bad6a04200a91b9327a303b3a88e5 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 15 Jan 2025 13:39:04 +0200 Subject: [PATCH 24/85] net/mlx5: Fix RDMA TX steering prio [ Upstream commit c08d3e62b2e73e14da318a1d20b52d0486a28ee0 ] User added steering rules at RDMA_TX were being added to the first prio, which is the counters prio. Fix that so that they are correctly added to the BYPASS_PRIO instead. Fixes: 24670b1a3166 ("net/mlx5: Add support for RDMA TX steering") Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Reviewed-by: Jacob Keller Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit ecd06d66f86d4fe9e3f08ae16edef4cd953ff904) --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 474e63d02ba49..d2dc375f5e49c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2490,6 +2490,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, break; case MLX5_FLOW_NAMESPACE_RDMA_TX: root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_BYPASS_PRIO; break; case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: root_ns = steering->rdma_rx_root_ns; From ca3e20a64cae6da718f00a755c707a7ae19c7a72 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 15 Jan 2025 13:39:07 +0200 Subject: [PATCH 25/85] net/mlx5: Clear port select structure when fail to create [ Upstream commit 5641e82cb55b4ecbc6366a499300917d2f3e6790 ] Clear the port select structure on error so no stale values left after definers are destroyed. That's because the mlx5_lag_destroy_definers() always try to destroy all lag definers in the tt_map, so in the flow below lag definers get double-destroyed and cause kernel crash: mlx5_lag_port_sel_create() mlx5_lag_create_definers() mlx5_lag_create_definer() <- Failed on tt 1 mlx5_lag_destroy_definers() <- definers[tt=0] gets destroyed mlx5_lag_port_sel_create() mlx5_lag_create_definers() mlx5_lag_create_definer() <- Failed on tt 0 mlx5_lag_destroy_definers() <- definers[tt=0] gets double-destroyed Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Mem abort info: ESR = 0x0000000096000005 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault Data abort info: ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 64k pages, 48-bit VAs, pgdp=0000000112ce2e00 [0000000000000008] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP Modules linked in: iptable_raw bonding ip_gre ip6_gre gre ip6_tunnel tunnel6 geneve ip6_udp_tunnel udp_tunnel ipip tunnel4 ip_tunnel rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) mlx5_ib(OE) ib_uverbs(OE) mlx5_fwctl(OE) fwctl(OE) mlx5_core(OE) mlxdevm(OE) ib_core(OE) mlxfw(OE) memtrack(OE) mlx_compat(OE) openvswitch nsh nf_conncount psample xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype iptable_filter iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter bridge stp llc netconsole overlay efi_pstore sch_fq_codel zram ip_tables crct10dif_ce qemu_fw_cfg fuse ipv6 crc_ccitt [last unloaded: mlx_compat(OE)] CPU: 3 UID: 0 PID: 217 Comm: kworker/u53:2 Tainted: G OE 6.11.0+ #2 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Workqueue: mlx5_lag mlx5_do_bond_work [mlx5_core] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mlx5_del_flow_rules+0x24/0x2c0 [mlx5_core] lr : mlx5_lag_destroy_definer+0x54/0x100 [mlx5_core] sp : ffff800085fafb00 x29: ffff800085fafb00 x28: ffff0000da0c8000 x27: 0000000000000000 x26: ffff0000da0c8000 x25: ffff0000da0c8000 x24: ffff0000da0c8000 x23: ffff0000c31f81a0 x22: 0400000000000000 x21: ffff0000da0c8000 x20: 0000000000000000 x19: 0000000000000001 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffff8b0c9350 x14: 0000000000000000 x13: ffff800081390d18 x12: ffff800081dc3cc0 x11: 0000000000000001 x10: 0000000000000b10 x9 : ffff80007ab7304c x8 : ffff0000d00711f0 x7 : 0000000000000004 x6 : 0000000000000190 x5 : ffff00027edb3010 x4 : 0000000000000000 x3 : 0000000000000000 x2 : ffff0000d39b8000 x1 : ffff0000d39b8000 x0 : 0400000000000000 Call trace: mlx5_del_flow_rules+0x24/0x2c0 [mlx5_core] mlx5_lag_destroy_definer+0x54/0x100 [mlx5_core] mlx5_lag_destroy_definers+0xa0/0x108 [mlx5_core] mlx5_lag_port_sel_create+0x2d4/0x6f8 [mlx5_core] mlx5_activate_lag+0x60c/0x6f8 [mlx5_core] mlx5_do_bond_work+0x284/0x5c8 [mlx5_core] process_one_work+0x170/0x3e0 worker_thread+0x2d8/0x3e0 kthread+0x11c/0x128 ret_from_fork+0x10/0x20 Code: a9025bf5 aa0003f6 a90363f7 f90023f9 (f9400400) ---[ end trace 0000000000000000 ]--- Fixes: dc48516ec7d3 ("net/mlx5: Lag, add support to create definers for LAG") Signed-off-by: Mark Zhang Reviewed-by: Leon Romanovsky Reviewed-by: Mark Bloch Reviewed-by: Jacob Keller Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit 4a7f17cc5d50a7c29c76e8137385edde70db7fad) --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 005661248c7e9..9faa9ef863a1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -540,7 +540,7 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, set_tt_map(port_sel, hash_type); err = mlx5_lag_create_definers(ldev, hash_type, ports); if (err) - return err; + goto clear_port_sel; if (port_sel->tunnel) { err = mlx5_lag_create_inner_ttc_table(ldev); @@ -559,6 +559,8 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, mlx5_destroy_ttc_table(port_sel->inner.ttc); destroy_definers: mlx5_lag_destroy_definers(ldev); +clear_port_sel: + memset(port_sel, 0, sizeof(*port_sel)); return err; } From c3fbb5cb89e0d4f39400c97f2cfec3c0ea442747 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 15 Jan 2025 13:39:08 +0200 Subject: [PATCH 26/85] net/mlx5e: Fix inversion dependency warning while enabling IPsec tunnel [ Upstream commit 2c3688090f8a1f085230aa839cc63e4a7b977df0 ] Attempt to enable IPsec packet offload in tunnel mode in debug kernel generates the following kernel panic, which is happening due to two issues: 1. In SA add section, the should be _bh() variant when marking SA mode. 2. There is not needed flush_workqueue in SA delete routine. It is not needed as at this stage as it is removed from SADB and the running work will be canceled later in SA free. ===================================================== WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected 6.12.0+ #4 Not tainted ----------------------------------------------------- charon/1337 [HC0[0]:SC0[4]:HE1:SE0] is trying to acquire: ffff88810f365020 (&xa->xa_lock#24){+.+.}-{3:3}, at: mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] and this task is already holding: ffff88813e0f0d48 (&x->lock){+.-.}-{3:3}, at: xfrm_state_delete+0x16/0x30 which would create a new lock dependency: (&x->lock){+.-.}-{3:3} -> (&xa->xa_lock#24){+.+.}-{3:3} but this new dependency connects a SOFTIRQ-irq-safe lock: (&x->lock){+.-.}-{3:3} ... which became SOFTIRQ-irq-safe at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_timer_handler+0x91/0xd70 __hrtimer_run_queues+0x1dd/0xa60 hrtimer_run_softirq+0x146/0x2e0 handle_softirqs+0x266/0x860 irq_exit_rcu+0x115/0x1a0 sysvec_apic_timer_interrupt+0x6e/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 default_idle+0x13/0x20 default_idle_call+0x67/0xa0 do_idle+0x2da/0x320 cpu_startup_entry+0x50/0x60 start_secondary+0x213/0x2a0 common_startup_64+0x129/0x138 to a SOFTIRQ-irq-unsafe lock: (&xa->xa_lock#24){+.+.}-{3:3} ... which became SOFTIRQ-irq-unsafe at: ... lock_acquire+0x1be/0x520 _raw_spin_lock+0x2c/0x40 xa_set_mark+0x70/0x110 mlx5e_xfrm_add_state+0xe48/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&xa->xa_lock#24); local_irq_disable(); lock(&x->lock); lock(&xa->xa_lock#24); lock(&x->lock); *** DEADLOCK *** 2 locks held by charon/1337: #0: ffffffff87f8f858 (&net->xfrm.xfrm_cfg_mutex){+.+.}-{4:4}, at: xfrm_netlink_rcv+0x5e/0x90 #1: ffff88813e0f0d48 (&x->lock){+.-.}-{3:3}, at: xfrm_state_delete+0x16/0x30 the dependencies between SOFTIRQ-irq-safe lock and the holding lock: -> (&x->lock){+.-.}-{3:3} ops: 29 { HARDIRQ-ON-W at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_alloc_spi+0xc0/0xe60 xfrm_alloc_userspi+0x5f6/0xbc0 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 IN-SOFTIRQ-W at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_timer_handler+0x91/0xd70 __hrtimer_run_queues+0x1dd/0xa60 hrtimer_run_softirq+0x146/0x2e0 handle_softirqs+0x266/0x860 irq_exit_rcu+0x115/0x1a0 sysvec_apic_timer_interrupt+0x6e/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 default_idle+0x13/0x20 default_idle_call+0x67/0xa0 do_idle+0x2da/0x320 cpu_startup_entry+0x50/0x60 start_secondary+0x213/0x2a0 common_startup_64+0x129/0x138 INITIAL USE at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 xfrm_alloc_spi+0xc0/0xe60 xfrm_alloc_userspi+0x5f6/0xbc0 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 } ... key at: [] __key.18+0x0/0x40 the dependencies between the lock to be acquired and SOFTIRQ-irq-unsafe lock: -> (&xa->xa_lock#24){+.+.}-{3:3} ops: 9 { HARDIRQ-ON-W at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 mlx5e_xfrm_add_state+0xc5b/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 SOFTIRQ-ON-W at: lock_acquire+0x1be/0x520 _raw_spin_lock+0x2c/0x40 xa_set_mark+0x70/0x110 mlx5e_xfrm_add_state+0xe48/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 INITIAL USE at: lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 mlx5e_xfrm_add_state+0xc5b/0x2290 [mlx5_core] xfrm_dev_state_add+0x3bb/0xd70 xfrm_add_sa+0x2451/0x4a90 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 } ... key at: [] __key.48+0x0/0xfffffffffff210a0 [mlx5_core] ... acquired at: __lock_acquire+0x30a0/0x5040 lock_acquire+0x1be/0x520 _raw_spin_lock_bh+0x34/0x40 mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] xfrm_dev_state_delete+0x90/0x160 __xfrm_state_delete+0x662/0xae0 xfrm_state_delete+0x1e/0x30 xfrm_del_sa+0x1c2/0x340 xfrm_user_rcv_msg+0x493/0x880 netlink_rcv_skb+0x12e/0x380 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 netlink_sendmsg+0x745/0xbe0 __sock_sendmsg+0xc5/0x190 __sys_sendto+0x1fe/0x2c0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 stack backtrace: CPU: 7 UID: 0 PID: 1337 Comm: charon Not tainted 6.12.0+ #4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x74/0xd0 check_irq_usage+0x12e8/0x1d90 ? print_shortest_lock_dependencies_backwards+0x1b0/0x1b0 ? check_chain_key+0x1bb/0x4c0 ? __lockdep_reset_lock+0x180/0x180 ? check_path.constprop.0+0x24/0x50 ? mark_lock+0x108/0x2fb0 ? print_circular_bug+0x9b0/0x9b0 ? mark_lock+0x108/0x2fb0 ? print_usage_bug.part.0+0x670/0x670 ? check_prev_add+0x1c4/0x2310 check_prev_add+0x1c4/0x2310 __lock_acquire+0x30a0/0x5040 ? lockdep_set_lock_cmp_fn+0x190/0x190 ? lockdep_set_lock_cmp_fn+0x190/0x190 lock_acquire+0x1be/0x520 ? mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x400/0x400 ? __xfrm_state_delete+0x5f0/0xae0 ? lock_downgrade+0x6b0/0x6b0 _raw_spin_lock_bh+0x34/0x40 ? mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] mlx5e_xfrm_del_state+0xca/0x1e0 [mlx5_core] xfrm_dev_state_delete+0x90/0x160 __xfrm_state_delete+0x662/0xae0 xfrm_state_delete+0x1e/0x30 xfrm_del_sa+0x1c2/0x340 ? xfrm_get_sa+0x250/0x250 ? check_chain_key+0x1bb/0x4c0 xfrm_user_rcv_msg+0x493/0x880 ? copy_sec_ctx+0x270/0x270 ? check_chain_key+0x1bb/0x4c0 ? lockdep_set_lock_cmp_fn+0x190/0x190 ? lockdep_set_lock_cmp_fn+0x190/0x190 netlink_rcv_skb+0x12e/0x380 ? copy_sec_ctx+0x270/0x270 ? netlink_ack+0xd90/0xd90 ? netlink_deliver_tap+0xcd/0xb60 xfrm_netlink_rcv+0x6d/0x90 netlink_unicast+0x42f/0x740 ? netlink_attachskb+0x730/0x730 ? lock_acquire+0x1be/0x520 netlink_sendmsg+0x745/0xbe0 ? netlink_unicast+0x740/0x740 ? __might_fault+0xbb/0x170 ? netlink_unicast+0x740/0x740 __sock_sendmsg+0xc5/0x190 ? fdget+0x163/0x1d0 __sys_sendto+0x1fe/0x2c0 ? __x64_sys_getpeername+0xb0/0xb0 ? do_user_addr_fault+0x856/0xe30 ? lock_acquire+0x1be/0x520 ? __task_pid_nr_ns+0x117/0x410 ? lock_downgrade+0x6b0/0x6b0 __x64_sys_sendto+0xdc/0x1b0 ? lockdep_hardirqs_on_prepare+0x284/0x400 do_syscall_64+0x6d/0x140 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f7d31291ba4 Code: 7d e8 89 4d d4 e8 4c 42 f7 ff 44 8b 4d d0 4c 8b 45 c8 89 c3 44 8b 55 d4 8b 7d e8 b8 2c 00 00 00 48 8b 55 d8 48 8b 75 e0 0f 05 <48> 3d 00 f0 ff ff 77 34 89 df 48 89 45 e8 e8 99 42 f7 ff 48 8b 45 RSP: 002b:00007f7d2ccd94f0 EFLAGS: 00000297 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f7d31291ba4 RDX: 0000000000000028 RSI: 00007f7d2ccd96a0 RDI: 000000000000000a RBP: 00007f7d2ccd9530 R08: 00007f7d2ccd9598 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000297 R12: 0000000000000028 R13: 00007f7d2ccd9598 R14: 00007f7d2ccd96a0 R15: 00000000000000e1 Fixes: 4c24272b4e2b ("net/mlx5e: Listen to ARP events to update IPsec L2 headers in tunnel mode") Signed-off-by: Leon Romanovsky Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit bb61edaa7ff610e877672c3add012952a1f2bea3) --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 015faddabc8e0..9fc6dbc83d141 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -763,9 +763,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, MLX5_IPSEC_RESCHED); if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && - x->props.mode == XFRM_MODE_TUNNEL) - xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, - MLX5E_IPSEC_TUNNEL_SA); + x->props.mode == XFRM_MODE_TUNNEL) { + xa_lock_bh(&ipsec->sadb); + __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + xa_unlock_bh(&ipsec->sadb); + } out: x->xso.offload_handle = (unsigned long)sa_entry; @@ -792,7 +795,6 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, static void mlx5e_xfrm_del_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *old; @@ -801,12 +803,6 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); WARN_ON(old != sa_entry); - - if (attrs->mode == XFRM_MODE_TUNNEL && - attrs->type == XFRM_DEV_OFFLOAD_PACKET) - /* Make sure that no ARP requests are running in parallel */ - flush_workqueue(ipsec->wq); - } static void mlx5e_xfrm_free_state(struct xfrm_state *x) From dd3ddd547f0c06f08a394cb66b5b5b6fd84d0d01 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 15 Jan 2025 13:39:09 +0200 Subject: [PATCH 27/85] net/mlx5e: Rely on reqid in IPsec tunnel mode [ Upstream commit 25f23524dfa227959beb3b2c2c0f38e0222f4cfa ] All packet offloads SAs have reqid in it to make sure they have corresponding policy. While it is not strictly needed for transparent mode, it is extremely important in tunnel mode. In that mode, policy and SAs have different match criteria. Policy catches the whole subnet addresses, and SA catches the tunnel gateways addresses. The source address of such tunnel is not known during egress packet traversal in flow steering as it is added only after successful encryption. As reqid is required for packet offload and it is unique for every SA, we can safely rely on it only. The output below shows the configured egress policy and SA by strongswan: [leonro@vm ~]$ sudo ip x s src 192.169.101.2 dst 192.169.101.1 proto esp spi 0xc88b7652 reqid 1 mode tunnel replay-window 0 flag af-unspec esn aead rfc4106(gcm(aes)) 0xe406a01083986e14d116488549094710e9c57bc6 128 anti-replay esn context: seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0x0 replay_window 1, bitmap-length 1 00000000 crypto offload parameters: dev eth2 dir out mode packet [leonro@064 ~]$ sudo ip x p src 192.170.0.0/16 dst 192.170.0.0/16 dir out priority 383615 ptype main tmpl src 192.169.101.2 dst 192.169.101.1 proto esp spi 0xc88b7652 reqid 1 mode tunnel crypto offload parameters: dev eth2 mode packet Fixes: b3beba1fb404 ("net/mlx5e: Allow policies with reqid 0, to support IKE policy holes") Signed-off-by: Leon Romanovsky Reviewed-by: Jacob Keller Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit 345de9405d6601a3dfcd9c2e02da67a30e92d790) --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 61288066830d9..2382c71289857 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -1442,23 +1442,21 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); - else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: + if (attrs->family == AF_INET) + setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + else + setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); break; case XFRM_DEV_OFFLOAD_PACKET: - if (attrs->reqid) - setup_fte_reg_c4(spec, attrs->reqid); + setup_fte_reg_c4(spec, attrs->reqid); err = setup_pkt_reformat(ipsec, attrs, &flow_act); if (err) goto err_pkt_reformat; From 4449551a09b594fe69f875435674a45110215323 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 15 Jan 2025 13:39:10 +0200 Subject: [PATCH 28/85] net/mlx5e: Always start IPsec sequence number from 1 [ Upstream commit 7f95b0247764acd739d949ff247db4b76138e55a ] According to RFC4303, section "3.3.3. Sequence Number Generation", the first packet sent using a given SA will contain a sequence number of 1. This is applicable to both ESN and non-ESN mode, which was not covered in commit mentioned in Fixes line. Fixes: 3d42c8cc67a8 ("net/mlx5e: Ensure that IPsec sequence packet number starts from 1") Signed-off-by: Leon Romanovsky Reviewed-by: Jacob Keller Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin (cherry picked from commit 8188414ea0a4c98d387854e9a07608b1c3540d30) --- .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 6 ++++++ .../mellanox/mlx5/core/en_accel/ipsec_offload.c | 11 ++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 9fc6dbc83d141..463c23ae0ad1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -719,6 +719,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, /* check esn */ if (x->props.flags & XFRM_STATE_ESN) mlx5e_ipsec_update_esn_state(sa_entry); + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = 1; mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index de83567aae791..940e350058d10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -90,8 +90,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, - struct mlx5_accel_esp_xfrm_attrs *attrs) + struct mlx5e_ipsec_sa_entry *sa_entry) { + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; void *aso_ctx; aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso); @@ -119,8 +120,12 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, * active. */ MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5); - if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); + if (!attrs->replay_esn.trigger) + MLX5_SET(ipsec_aso, aso_ctx, mode_parameter, + sa_entry->esn_state.esn); + } if (attrs->lft.hard_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, @@ -173,7 +178,7 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) res = &mdev->mlx5e_res.hw_objs; if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) - mlx5e_ipsec_packet_setup(obj, res->pdn, attrs); + mlx5e_ipsec_packet_setup(obj, res->pdn, sa_entry); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) From bd5b289a866f4e50697e9268224c01f20144392d Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Fri, 10 Jan 2025 12:53:35 -0600 Subject: [PATCH 29/85] drm/vmwgfx: Add new keep_resv BO param [ Upstream commit b7d40627813799870e72729c6fc979a8a40d9ba6 ] Adds a new BO param that keeps the reservation locked after creation. This removes the need to re-reserve the BO after creation which is a waste of cycles. This also fixes a bug in vmw_prime_import_sg_table where the imported reservation is unlocked twice. Signed-off-by: Ian Forbes Fixes: b32233acceff ("drm/vmwgfx: Fix prime import/export") Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20250110185335.15301-1-ian.forbes@broadcom.com Signed-off-by: Sasha Levin (cherry picked from commit f014e8c49c816c9cc298fc939153567f5f016c64) --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 3 ++- drivers/gpu/drm/vmwgfx/vmwgfx_bo.h | 3 ++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 7 ++----- drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 7 ++----- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 5 ++--- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index fdc34283eeb97..ec6ca264ce11f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -412,7 +412,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv, if (params->pin) ttm_bo_pin(&vmw_bo->tbo); - ttm_bo_unreserve(&vmw_bo->tbo); + if (!params->keep_resv) + ttm_bo_unreserve(&vmw_bo->tbo); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index 156ea612fc2a4..a3ac61b991bf6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -53,8 +53,9 @@ struct vmw_bo_params { u32 domain; u32 busy_domain; enum ttm_bo_type bo_type; - size_t size; bool pin; + bool keep_resv; + size_t size; struct dma_resv *resv; struct sg_table *sg; }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index bea576434e475..4655c266924fe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -399,7 +399,8 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_kernel, .size = PAGE_SIZE, - .pin = true + .pin = true, + .keep_resv = true, }; /* @@ -411,10 +412,6 @@ static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv) if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); - vmw_bo_pin_reserved(vbo, true); - ret = ttm_bo_kmap(&vbo->tbo, 0, 1, &map); if (likely(ret == 0)) { result = ttm_kmap_obj_virtual(&map, &dummy); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index d6bcaf078b1f4..0dc3dacc5beee 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -163,6 +163,7 @@ struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev, .bo_type = ttm_bo_type_sg, .size = attach->dmabuf->size, .pin = false, + .keep_resv = true, .resv = attach->dmabuf->resv, .sg = table, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index a01ca3226d0af..7fb1c88bcc475 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -896,7 +896,8 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, .busy_domain = VMW_BO_DOMAIN_SYS, .bo_type = ttm_bo_type_device, .size = size, - .pin = true + .pin = true, + .keep_resv = true, }; if (!vmw_shader_id_ok(user_key, shader_type)) @@ -906,10 +907,6 @@ int vmw_compat_shader_add(struct vmw_private *dev_priv, if (unlikely(ret != 0)) goto out; - ret = ttm_bo_reserve(&buf->tbo, false, true, NULL); - if (unlikely(ret != 0)) - goto no_reserve; - /* Map and copy shader bytecode. */ ret = ttm_bo_kmap(&buf->tbo, 0, PFN_UP(size), &map); if (unlikely(ret != 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index fcb87d83760ef..75cf9e76df2ed 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -604,15 +604,14 @@ int vmw_bo_create_and_populate(struct vmw_private *dev_priv, .busy_domain = domain, .bo_type = ttm_bo_type_kernel, .size = bo_size, - .pin = true + .pin = true, + .keep_resv = true, }; ret = vmw_bo_create(dev_priv, &bo_params, &vbo); if (unlikely(ret != 0)) return ret; - ret = ttm_bo_reserve(&vbo->tbo, false, true, NULL); - BUG_ON(ret != 0); ret = vmw_ttm_populate(vbo->tbo.bdev, vbo->tbo.ttm, &ctx); if (likely(ret == 0)) { struct vmw_ttm_tt *vmw_tt = From 4c9399d8cc13a5b2fe15c01eb8296d7cbef20d24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 13 Jan 2025 12:47:40 -0300 Subject: [PATCH 30/85] drm/v3d: Ensure job pointer is set to NULL after job completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e4b5ccd392b92300a2b341705cc4805681094e49 ] After a job completes, the corresponding pointer in the device must be set to NULL. Failing to do so triggers a warning when unloading the driver, as it appears the job is still active. To prevent this, assign the job pointer to NULL after completing the job, indicating the job has finished. Fixes: 14d1d1908696 ("drm/v3d: Remove the bad signaled() implementation.") Signed-off-by: Maíra Canal Reviewed-by: Jose Maria Casanova Crespo Link: https://patchwork.freedesktop.org/patch/msgid/20250113154741.67520-1-mcanal@igalia.com Signed-off-by: Sasha Levin (cherry picked from commit 13b6febefb041b71d07609ef4c952d1202321638) --- drivers/gpu/drm/v3d/v3d_irq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index e714d5318f309..76806039691a2 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -103,6 +103,7 @@ v3d_irq(int irq, void *arg) trace_v3d_bcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); + v3d->bin_job = NULL; status = IRQ_HANDLED; } @@ -112,6 +113,7 @@ v3d_irq(int irq, void *arg) trace_v3d_rcl_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); + v3d->render_job = NULL; status = IRQ_HANDLED; } @@ -121,6 +123,7 @@ v3d_irq(int irq, void *arg) trace_v3d_csd_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); + v3d->csd_job = NULL; status = IRQ_HANDLED; } @@ -157,6 +160,7 @@ v3d_hub_irq(int irq, void *arg) trace_v3d_tfu_irq(&v3d->drm, fence->seqno); dma_fence_signal(&fence->base); + v3d->tfu_job = NULL; status = IRQ_HANDLED; } From 4ffb5b1b6bb96fe078fadaf23601a18dace9a67b Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 20 Dec 2024 15:35:07 +0530 Subject: [PATCH 31/85] soc: ti: pruss: Fix pruss APIs [ Upstream commit 202580b60229345dc2637099f10c8a8857c1fdc2 ] PRUSS APIs in pruss_driver.h produce lots of compilation errors when CONFIG_TI_PRUSS is not set. The errors and warnings, warning: returning 'void *' from a function with return type 'int' makes integer from pointer without a cast [-Wint-conversion] error: expected identifier or '(' before '{' token Fix these warnings and errors by fixing the return type of pruss APIs as well as removing the misplaced semicolon from pruss_cfg_xfr_enable() Fixes: 0211cc1e4fbb ("soc: ti: pruss: Add helper functions to set GPI mode, MII_RT_event and XFR") Signed-off-by: MD Danish Anwar Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20241220100508.1554309-2-danishanwar@ti.com Signed-off-by: Nishanth Menon Signed-off-by: Sasha Levin (cherry picked from commit c113a703da8f9548efa566dcab9d268f83b33022) --- include/linux/pruss_driver.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h index c9a31c567e85b..2e18fef1a2e10 100644 --- a/include/linux/pruss_driver.h +++ b/include/linux/pruss_driver.h @@ -144,32 +144,32 @@ static inline int pruss_release_mem_region(struct pruss *pruss, static inline int pruss_cfg_get_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 *mux) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_set_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 mux) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_gpimode(struct pruss *pruss, enum pruss_pru_id pru_id, enum pruss_gpi_mode mode) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_miirt_enable(struct pruss *pruss, bool enable) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } static inline int pruss_cfg_xfr_enable(struct pruss *pruss, enum pru_type pru_type, - bool enable); + bool enable) { - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } #endif /* CONFIG_TI_PRUSS */ From 3217cb1f1dcccc6fd3eab0142f970e350885be5c Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 14 Jan 2025 15:45:52 -0600 Subject: [PATCH 32/85] hwmon: (tmp513) Fix division of negative numbers [ Upstream commit e2c68cea431d65292b592c9f8446c918d45fcf78 ] Fix several issues with division of negative numbers in the tmp513 driver. The docs on the DIV_ROUND_CLOSEST macro explain that dividing a negative value by an unsigned type is undefined behavior. The driver was doing this in several places, i.e. data->shunt_uohms has type of u32. The actual "undefined" behavior is that it converts both values to unsigned before doing the division, for example: int ret = DIV_ROUND_CLOSEST(-100, 3U); results in ret == 1431655732 instead of -33. Furthermore the MILLI macro has a type of unsigned long. Multiplying a signed long by an unsigned long results in an unsigned long. So, we need to cast both MILLI and data data->shunt_uohms to long when using the DIV_ROUND_CLOSEST macro. Fixes: f07f9d2467f4 ("hwmon: (tmp513) Use SI constants from units.h") Fixes: 59dfa75e5d82 ("hwmon: Add driver for Texas Instruments TMP512/513 sensor chips.") Signed-off-by: David Lechner Link: https://lore.kernel.org/r/20250114-fix-si-prefix-macro-sign-bugs-v1-1-696fd8d10f00@baylibre.com [groeck: Drop some continuation lines] Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin (cherry picked from commit 06f5c9bab6e85ca0609fe17f1d6cb229b77f0596) --- drivers/hwmon/tmp513.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c index 070f93226ed69..62d31aadda4bb 100644 --- a/drivers/hwmon/tmp513.c +++ b/drivers/hwmon/tmp513.c @@ -203,7 +203,8 @@ static int tmp51x_get_value(struct tmp51x_data *data, u8 reg, u8 pos, *val = sign_extend32(regval, reg == TMP51X_SHUNT_CURRENT_RESULT ? 16 - tmp51x_get_pga_shift(data) : 15); - *val = DIV_ROUND_CLOSEST(*val * 10 * MILLI, data->shunt_uohms); + *val = DIV_ROUND_CLOSEST(*val * 10 * (long)MILLI, (long)data->shunt_uohms); + break; case TMP51X_BUS_VOLTAGE_RESULT: case TMP51X_BUS_VOLTAGE_H_LIMIT: @@ -219,7 +220,7 @@ static int tmp51x_get_value(struct tmp51x_data *data, u8 reg, u8 pos, case TMP51X_BUS_CURRENT_RESULT: // Current = (ShuntVoltage * CalibrationRegister) / 4096 *val = sign_extend32(regval, 15) * (long)data->curr_lsb_ua; - *val = DIV_ROUND_CLOSEST(*val, MILLI); + *val = DIV_ROUND_CLOSEST(*val, (long)MILLI); break; case TMP51X_LOCAL_TEMP_RESULT: case TMP51X_REMOTE_TEMP_RESULT_1: @@ -259,7 +260,7 @@ static int tmp51x_set_value(struct tmp51x_data *data, u8 reg, long val) * The user enter current value and we convert it to * voltage. 1lsb = 10uV */ - val = DIV_ROUND_CLOSEST(val * data->shunt_uohms, 10 * MILLI); + val = DIV_ROUND_CLOSEST(val * (long)data->shunt_uohms, 10 * (long)MILLI); max_val = U16_MAX >> tmp51x_get_pga_shift(data); regval = clamp_val(val, -max_val, max_val); break; From 0d47e5c08807b27723c89b3b1156b1b365ce3b92 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Wed, 15 Jan 2025 13:41:56 +0000 Subject: [PATCH 33/85] Revert "mtd: spi-nor: core: replace dummy buswidth from addr to data" [ Upstream commit d15638bf76ad47874ecb5dc386f0945fc0b2a875 ] This reverts commit 98d1fb94ce75f39febd456d6d3cbbe58b6678795. The commit uses data nbits instead of addr nbits for dummy phase. This causes a regression for all boards where spi-tx-bus-width is smaller than spi-rx-bus-width. It is a common pattern for boards to have spi-tx-bus-width == 1 and spi-rx-bus-width > 1. The regression causes all reads with a dummy phase to become unavailable for such boards, leading to a usually slower 0-dummy-cycle read being selected. Most controllers' supports_op hooks call spi_mem_default_supports_op(). In spi_mem_default_supports_op(), spi_mem_check_buswidth() is called to check if the buswidths for the op can actually be supported by the board's wiring. This wiring information comes from (among other things) the spi-{tx,rx}-bus-width DT properties. Based on these properties, SPI_TX_* or SPI_RX_* flags are set by of_spi_parse_dt(). spi_mem_check_buswidth() then uses these flags to make the decision whether an op can be supported by the board's wiring (in a way, indirectly checking against spi-{rx,tx}-bus-width). Now the tricky bit here is that spi_mem_check_buswidth() does: if (op->dummy.nbytes && spi_check_buswidth_req(mem, op->dummy.buswidth, true)) return false; The true argument to spi_check_buswidth_req() means the op is treated as a TX op. For a board that has say 1-bit TX and 4-bit RX, a 4-bit dummy TX is considered as unsupported, and the op gets rejected. The commit being reverted uses the data buswidth for dummy buswidth. So for reads, the RX buswidth gets used for the dummy phase, uncovering this issue. In reality, a dummy phase is neither RX nor TX. As the name suggests, these are just dummy cycles that send or receive no data, and thus don't really need to have any buswidth at all. Ideally, dummy phases should not be checked against the board's wiring capabilities at all, and should only be sanity-checked for having a sane buswidth value. Since we are now at rc7 and such a change might introduce many unexpected bugs, revert the commit for now. It can be sent out later along with the spi_mem_check_buswidth() fix. Fixes: 98d1fb94ce75 ("mtd: spi-nor: core: replace dummy buswidth from addr to data") Reported-by: Alexander Stein Closes: https://lore.kernel.org/linux-mtd/3342163.44csPzL39Z@steina-w/ Tested-by: Alexander Stein Reviewed-by: Tudor Ambarus Signed-off-by: Pratyush Yadav Signed-off-by: Miquel Raynal Signed-off-by: Sasha Levin (cherry picked from commit 64f71ceb9b24c4364bb54496ec7350d40ba9ec5e) --- drivers/mtd/spi-nor/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 6dbfde1c74783..0c67402f15b7d 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -91,7 +91,7 @@ void spi_nor_spimem_setup_op(const struct spi_nor *nor, op->addr.buswidth = spi_nor_get_protocol_addr_nbits(proto); if (op->dummy.nbytes) - op->dummy.buswidth = spi_nor_get_protocol_data_nbits(proto); + op->dummy.buswidth = spi_nor_get_protocol_addr_nbits(proto); if (op->data.nbytes) op->data.buswidth = spi_nor_get_protocol_data_nbits(proto); From 6e7b783873390f0a58a476709aa2b65a27b1863e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 15 Jan 2025 08:29:45 +0100 Subject: [PATCH 34/85] i2c: mux: demux-pinctrl: check initial mux selection, too [ Upstream commit ca89f73394daf92779ddaa37b42956f4953f3941 ] When misconfigured, the initial setup of the current mux channel can fail, too. It must be checked as well. Fixes: 50a5ba876908 ("i2c: mux: demux-pinctrl: add driver") Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin (cherry picked from commit 03c958fb3e9987ed63888c584e8033b17a08078a) --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 9f2e4aa281593..299abb6dd9423 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -261,7 +261,9 @@ static int i2c_demux_pinctrl_probe(struct platform_device *pdev) pm_runtime_no_callbacks(&pdev->dev); /* switch to first parent as active master */ - i2c_demux_activate_master(priv, 0); + err = i2c_demux_activate_master(priv, 0); + if (err) + goto err_rollback; err = device_create_file(&pdev->dev, &dev_attr_available_masters); if (err) From e72ffcce6e66e0308157a07f900bccd0f431f435 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 15 Jan 2025 13:36:23 +0100 Subject: [PATCH 35/85] i2c: rcar: fix NACK handling when being a target [ Upstream commit 093f70c134f70e4632b295240f07d2b50b74e247 ] When this controller is a target, the NACK handling had two issues. First, the return value from the backend was not checked on the initial WRITE_REQUESTED. So, the driver missed to send a NACK in this case. Also, the NACK always arrives one byte late on the bus, even in the WRITE_RECEIVED case. This seems to be a HW issue. We should then not rely on the backend to correctly NACK the superfluous byte as well. Fix both issues by introducing a flag which gets set whenever the backend requests a NACK and keep sending it until we get a STOP condition. Fixes: de20d1857dd6 ("i2c: rcar: add slave support") Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin (cherry picked from commit 0558c4e0fd6e49cdd0b16d52fcf47cfab88e04bf) --- drivers/i2c/busses/i2c-rcar.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 84fdd3f5cc844..610df67cedaad 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -110,6 +110,8 @@ #define ID_P_PM_BLOCKED BIT(31) #define ID_P_MASK GENMASK(31, 28) +#define ID_SLAVE_NACK BIT(0) + enum rcar_i2c_type { I2C_RCAR_GEN1, I2C_RCAR_GEN2, @@ -143,6 +145,7 @@ struct rcar_i2c_priv { int irq; struct i2c_client *host_notify_client; + u8 slave_flags; }; #define rcar_i2c_priv_to_dev(p) ((p)->adap.dev.parent) @@ -597,6 +600,7 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) { u32 ssr_raw, ssr_filtered; u8 value; + int ret; ssr_raw = rcar_i2c_read(priv, ICSSR) & 0xff; ssr_filtered = ssr_raw & rcar_i2c_read(priv, ICSIER); @@ -612,7 +616,10 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) rcar_i2c_write(priv, ICRXTX, value); rcar_i2c_write(priv, ICSIER, SDE | SSR | SAR); } else { - i2c_slave_event(priv->slave, I2C_SLAVE_WRITE_REQUESTED, &value); + ret = i2c_slave_event(priv->slave, I2C_SLAVE_WRITE_REQUESTED, &value); + if (ret) + priv->slave_flags |= ID_SLAVE_NACK; + rcar_i2c_read(priv, ICRXTX); /* dummy read */ rcar_i2c_write(priv, ICSIER, SDR | SSR | SAR); } @@ -625,18 +632,21 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) if (ssr_filtered & SSR) { i2c_slave_event(priv->slave, I2C_SLAVE_STOP, &value); rcar_i2c_write(priv, ICSCR, SIE | SDBS); /* clear our NACK */ + priv->slave_flags &= ~ID_SLAVE_NACK; rcar_i2c_write(priv, ICSIER, SAR); rcar_i2c_write(priv, ICSSR, ~SSR & 0xff); } /* master wants to write to us */ if (ssr_filtered & SDR) { - int ret; - value = rcar_i2c_read(priv, ICRXTX); ret = i2c_slave_event(priv->slave, I2C_SLAVE_WRITE_RECEIVED, &value); - /* Send NACK in case of error */ - rcar_i2c_write(priv, ICSCR, SIE | SDBS | (ret < 0 ? FNA : 0)); + if (ret) + priv->slave_flags |= ID_SLAVE_NACK; + + /* Send NACK in case of error, but it will come 1 byte late :( */ + rcar_i2c_write(priv, ICSCR, SIE | SDBS | + (priv->slave_flags & ID_SLAVE_NACK ? FNA : 0)); rcar_i2c_write(priv, ICSSR, ~SDR & 0xff); } From 71b18a0aedafed2e495e939b323a9c7a1696bbc1 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 14 Jan 2025 12:48:48 -0300 Subject: [PATCH 36/85] smb: client: fix double free of TCP_Server_Info::hostname [ Upstream commit fa2f9906a7b333ba757a7dbae0713d8a5396186e ] When shutting down the server in cifs_put_tcp_session(), cifsd thread might be reconnecting to multiple DFS targets before it realizes it should exit the loop, so @server->hostname can't be freed as long as cifsd thread isn't done. Otherwise the following can happen: RIP: 0010:__slab_free+0x223/0x3c0 Code: 5e 41 5f c3 cc cc cc cc 4c 89 de 4c 89 cf 44 89 44 24 08 4c 89 1c 24 e8 fb cf 8e 00 44 8b 44 24 08 4c 8b 1c 24 e9 5f fe ff ff <0f> 0b 41 f7 45 08 00 0d 21 00 0f 85 2d ff ff ff e9 1f ff ff ff 80 RSP: 0018:ffffb26180dbfd08 EFLAGS: 00010246 RAX: ffff8ea34728e510 RBX: ffff8ea34728e500 RCX: 0000000000800068 RDX: 0000000000800068 RSI: 0000000000000000 RDI: ffff8ea340042400 RBP: ffffe112041ca380 R08: 0000000000000001 R09: 0000000000000000 R10: 6170732e31303000 R11: 70726f632e786563 R12: ffff8ea34728e500 R13: ffff8ea340042400 R14: ffff8ea34728e500 R15: 0000000000800068 FS: 0000000000000000(0000) GS:ffff8ea66fd80000(0000) 000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffc25376080 CR3: 000000012a2ba001 CR4: PKRU: 55555554 Call Trace: ? show_trace_log_lvl+0x1c4/0x2df ? show_trace_log_lvl+0x1c4/0x2df ? __reconnect_target_unlocked+0x3e/0x160 [cifs] ? __die_body.cold+0x8/0xd ? die+0x2b/0x50 ? do_trap+0xce/0x120 ? __slab_free+0x223/0x3c0 ? do_error_trap+0x65/0x80 ? __slab_free+0x223/0x3c0 ? exc_invalid_op+0x4e/0x70 ? __slab_free+0x223/0x3c0 ? asm_exc_invalid_op+0x16/0x20 ? __slab_free+0x223/0x3c0 ? extract_hostname+0x5c/0xa0 [cifs] ? extract_hostname+0x5c/0xa0 [cifs] ? __kmalloc+0x4b/0x140 __reconnect_target_unlocked+0x3e/0x160 [cifs] reconnect_dfs_server+0x145/0x430 [cifs] cifs_handle_standard+0x1ad/0x1d0 [cifs] cifs_demultiplex_thread+0x592/0x730 [cifs] ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] kthread+0xdd/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x50 Fixes: 7be3248f3139 ("cifs: To match file servers, make sure the server hostname matches") Reported-by: Jay Shin Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin (cherry picked from commit 61d3a85d493991d268084a5a7f587ce7242306cb) --- fs/smb/client/connect.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 20f303f2a5d75..dbcaaa274abdb 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1061,6 +1061,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server) /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); kfree(server->leaf_fullpath); + kfree(server->hostname); kfree(server); length = atomic_dec_return(&tcpSesAllocCount); @@ -1684,8 +1685,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) kfree_sensitive(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; - kfree(server->hostname); - server->hostname = NULL; task = xchg(&server->tsk, NULL); if (task) From a6b8dc690283f40a2385081a072bee5c1e11091b Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Wed, 13 Nov 2024 17:51:29 +0800 Subject: [PATCH 37/85] mac802154: check local interfaces before deleting sdata list [ Upstream commit eb09fbeb48709fe66c0d708aed81e910a577a30a ] syzkaller reported a corrupted list in ieee802154_if_remove. [1] Remove an IEEE 802.15.4 network interface after unregister an IEEE 802.15.4 hardware device from the system. CPU0 CPU1 ==== ==== genl_family_rcv_msg_doit ieee802154_unregister_hw ieee802154_del_iface ieee802154_remove_interfaces rdev_del_virtual_intf_deprecated list_del(&sdata->list) ieee802154_if_remove list_del_rcu The net device has been unregistered, since the rcu grace period, unregistration must be run before ieee802154_if_remove. To avoid this issue, add a check for local->interfaces before deleting sdata list. [1] kernel BUG at lib/list_debug.c:58! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 0 UID: 0 PID: 6277 Comm: syz-executor157 Not tainted 6.12.0-rc6-syzkaller-00005-g557329bcecc2 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__list_del_entry_valid_or_report+0xf4/0x140 lib/list_debug.c:56 Code: e8 a1 7e 00 07 90 0f 0b 48 c7 c7 e0 37 60 8c 4c 89 fe e8 8f 7e 00 07 90 0f 0b 48 c7 c7 40 38 60 8c 4c 89 fe e8 7d 7e 00 07 90 <0f> 0b 48 c7 c7 a0 38 60 8c 4c 89 fe e8 6b 7e 00 07 90 0f 0b 48 c7 RSP: 0018:ffffc9000490f3d0 EFLAGS: 00010246 RAX: 000000000000004e RBX: dead000000000122 RCX: d211eee56bb28d00 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffff88805b278dd8 R08: ffffffff8174a12c R09: 1ffffffff2852f0d R10: dffffc0000000000 R11: fffffbfff2852f0e R12: dffffc0000000000 R13: dffffc0000000000 R14: dead000000000100 R15: ffff88805b278cc0 FS: 0000555572f94380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056262e4a3000 CR3: 0000000078496000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __list_del_entry_valid include/linux/list.h:124 [inline] __list_del_entry include/linux/list.h:215 [inline] list_del_rcu include/linux/rculist.h:157 [inline] ieee802154_if_remove+0x86/0x1e0 net/mac802154/iface.c:687 rdev_del_virtual_intf_deprecated net/ieee802154/rdev-ops.h:24 [inline] ieee802154_del_iface+0x2c0/0x5c0 net/ieee802154/nl-phy.c:323 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2551 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1331 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1357 netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:744 ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2607 ___sys_sendmsg net/socket.c:2661 [inline] __sys_sendmsg+0x292/0x380 net/socket.c:2690 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Reported-and-tested-by: syzbot+985f827280dc3a6e7e92@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=985f827280dc3a6e7e92 Signed-off-by: Lizhi Xu Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/20241113095129.1457225-1-lizhi.xu@windriver.com Signed-off-by: Stefan Schmidt Signed-off-by: Sasha Levin (cherry picked from commit 9b3f58ef3c5facf142583582fb5bc8e5f2f82242) --- net/mac802154/iface.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index c0e2da5072bea..9e4631fade90c 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -684,6 +684,10 @@ void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata) ASSERT_RTNL(); mutex_lock(&sdata->local->iflist_mtx); + if (list_empty(&sdata->local->interfaces)) { + mutex_unlock(&sdata->local->iflist_mtx); + return; + } list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); From 7f69e34ac738256a2ebc2fbdfcaf7e4424ce1595 Mon Sep 17 00:00:00 2001 From: Leo Stone Date: Sat, 30 Nov 2024 21:14:19 -0800 Subject: [PATCH 38/85] hfs: Sanity check the root record [ Upstream commit b905bafdea21a75d75a96855edd9e0b6051eee30 ] In the syzbot reproducer, the hfs_cat_rec for the root dir has type HFS_CDR_FIL after being read with hfs_bnode_read() in hfs_super_fill(). This indicates it should be used as an hfs_cat_file, which is 102 bytes. Only the first 70 bytes of that struct are initialized, however, because the entrylength passed into hfs_bnode_read() is still the length of a directory record. This causes uninitialized values to be used later on, when the hfs_cat_rec union is treated as the larger hfs_cat_file struct. Add a check to make sure the retrieved record has the correct type for the root directory (HFS_CDR_DIR), and make sure we load the correct number of bytes for a directory record. Reported-by: syzbot+2db3c7526ba68f4ea776@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2db3c7526ba68f4ea776 Tested-by: syzbot+2db3c7526ba68f4ea776@syzkaller.appspotmail.com Tested-by: Leo Stone Signed-off-by: Leo Stone Link: https://lore.kernel.org/r/20241201051420.77858-1-leocstone@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin (cherry picked from commit 54297c2b8457d5d6285f7a79989c496afab3a55b) --- fs/hfs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 6764afa98a6ff..431bdc65f7231 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -418,11 +418,13 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) goto bail_no_root; res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd); if (!res) { - if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { + if (fd.entrylength != sizeof(rec.dir)) { res = -EIO; goto bail_hfs_find; } hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); + if (rec.type != HFS_CDR_DIR) + res = -EIO; } if (res) goto bail_hfs_find; From 8e75fbadb0b57785ca6964c4ed32d49d27d7d0ca Mon Sep 17 00:00:00 2001 From: Zhang Kunbo Date: Tue, 17 Dec 2024 07:18:36 +0000 Subject: [PATCH 39/85] fs: fix missing declaration of init_files [ Upstream commit 2b2fc0be98a828cf33a88a28e9745e8599fb05cf ] fs/file.c should include include/linux/init_task.h for declaration of init_files. This fixes the sparse warning: fs/file.c:501:21: warning: symbol 'init_files' was not declared. Should it be static? Signed-off-by: Zhang Kunbo Link: https://lore.kernel.org/r/20241217071836.2634868-1-zhangkunbo@huawei.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin (cherry picked from commit 0f94002ac503a452cd88f42ce77229f4842fe553) --- fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/file.c b/fs/file.c index 225cd0ad50c31..9841f09940766 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "internal.h" From f625dc2bf137d43a930c8412305619ebba1fff97 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Dec 2024 13:50:01 +0000 Subject: [PATCH 40/85] kheaders: Ignore silly-rename files [ Upstream commit 973b710b8821c3401ad7a25360c89e94b26884ac ] Tell tar to ignore silly-rename files (".__afs*" and ".nfs*") when building the header archive. These occur when a file that is open is unlinked locally, but hasn't yet been closed. Such files are visible to the user via the getdents() syscall and so programs may want to do things with them. During the kernel build, such files may be made during the processing of header files and the cleanup may get deferred by fput() which may result in tar seeing these files when it reads the directory, but they may have disappeared by the time it tries to open them, causing tar to fail with an error. Further, we don't want to include them in the tarball if they still exist. With CONFIG_HEADERS_INSTALL=y, something like the following may be seen: find: './kernel/.tmp_cpio_dir/include/dt-bindings/reset/.__afs2080': No such file or directory tar: ./include/linux/greybus/.__afs3C95: File removed before we read it The find warning doesn't seem to cause a problem. Fix this by telling tar when called from in gen_kheaders.sh to exclude such files. This only affects afs and nfs; cifs uses the Windows Hidden attribute to prevent the file from being seen. Signed-off-by: David Howells Link: https://lore.kernel.org/r/20241213135013.2964079-2-dhowells@redhat.com cc: Masahiro Yamada cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-nfs@vger.kernel.org cc: linux-kernel@vger.kernel.org Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin (cherry picked from commit 15fa284f8d6435fe811dfa739d2b59ad2eb6b11a) --- kernel/gen_kheaders.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 383fd43ac6122..7e1340da5acae 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -89,6 +89,7 @@ find $cpio_dir -type f -print0 | # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ + --exclude=".__afs*" --exclude=".nfs*" \ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null From 65e954568c64d67fb530a9fd0620c5eff361af68 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 13 Dec 2024 13:50:05 +0000 Subject: [PATCH 41/85] cachefiles: Parse the "secctx" immediately [ Upstream commit e5a8b6446c0d370716f193771ccacf3260a57534 ] Instead of storing an opaque string, call security_secctx_to_secid() right in the "secctx" command handler and store only the numeric "secid". This eliminates an unnecessary string allocation and allows the daemon to receive errors when writing the "secctx" command instead of postponing the error to the "bind" command handler. For example, if the kernel was built without `CONFIG_SECURITY`, "bind" will return `EOPNOTSUPP`, but the daemon doesn't know why. With this patch, the "secctx" will instead return `EOPNOTSUPP` which is the right context for this error. This patch adds a boolean flag `have_secid` because I'm not sure if we can safely assume that zero is the special secid value for "not set". This appears to be true for SELinux, Smack and AppArmor, but since this attribute is not documented, I'm unable to derive a stable guarantee for that. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/r/20241209141554.638708-1-max.kellermann@ionos.com/ Link: https://lore.kernel.org/r/20241213135013.2964079-6-dhowells@redhat.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin (cherry picked from commit 11f7c700de15166ed701c7afae9da297f7fac31a) --- fs/cachefiles/daemon.c | 14 +++++++------- fs/cachefiles/internal.h | 3 ++- fs/cachefiles/security.c | 6 +++--- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 89b11336a8369..1806bff8e59bc 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -576,7 +577,7 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) */ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) { - char *secctx; + int err; _enter(",%s", args); @@ -585,16 +586,16 @@ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) return -EINVAL; } - if (cache->secctx) { + if (cache->have_secid) { pr_err("Second security context specified\n"); return -EINVAL; } - secctx = kstrdup(args, GFP_KERNEL); - if (!secctx) - return -ENOMEM; + err = security_secctx_to_secid(args, strlen(args), &cache->secid); + if (err) + return err; - cache->secctx = secctx; + cache->have_secid = true; return 0; } @@ -820,7 +821,6 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) put_cred(cache->cache_cred); kfree(cache->rootdirname); - kfree(cache->secctx); kfree(cache->tag); _leave(""); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 111ad6ecd4baf..4421a12960a66 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -122,7 +122,6 @@ struct cachefiles_cache { #define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ #define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */ char *rootdirname; /* name of cache root directory */ - char *secctx; /* LSM security context */ char *tag; /* cache binding tag */ refcount_t unbind_pincount;/* refcount to do daemon unbind */ struct xarray reqs; /* xarray of pending on-demand requests */ @@ -130,6 +129,8 @@ struct cachefiles_cache { struct xarray ondemand_ids; /* xarray for ondemand_id allocation */ u32 ondemand_id_next; u32 msg_id_next; + u32 secid; /* LSM security id */ + bool have_secid; /* whether "secid" was set */ }; static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c index fe777164f1d89..fc6611886b3b5 100644 --- a/fs/cachefiles/security.c +++ b/fs/cachefiles/security.c @@ -18,7 +18,7 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache) struct cred *new; int ret; - _enter("{%s}", cache->secctx); + _enter("{%u}", cache->have_secid ? cache->secid : 0); new = prepare_kernel_cred(current); if (!new) { @@ -26,8 +26,8 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache) goto error; } - if (cache->secctx) { - ret = set_security_override_from_ctx(new, cache->secctx); + if (cache->have_secid) { + ret = set_security_override(new, cache->secid); if (ret < 0) { put_cred(new); pr_err("Security denies permission to nominate security context: error %d\n", From 63f3e3405367003826e98621b1112bfd5af982ba Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 19 Dec 2024 22:20:42 +0530 Subject: [PATCH 42/85] scsi: ufs: core: Honor runtime/system PM levels if set by host controller drivers [ Upstream commit bb9850704c043e48c86cc9df90ee102e8a338229 ] Otherwise, the default levels will override the levels set by the host controller drivers. Signed-off-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20241219-ufs-qcom-suspend-fix-v3-2-63c4b95a70b9@linaro.org Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin (cherry picked from commit 1950db95950f691c5f409b5913e5dc1212cec01c) --- drivers/ufs/core/ufshcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 02696c7f9beff..0ac0b6aaf9c62 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10483,14 +10483,17 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } /* - * Set the default power management level for runtime and system PM. + * Set the default power management level for runtime and system PM if + * not set by the host controller drivers. * Default power saving mode is to keep UFS link in Hibern8 state * and UFS device in sleep state. */ - hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + if (!hba->rpm_lvl) + hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE); - hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + if (!hba->spm_lvl) + hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE); From 8d640b4f137e7f6b92b3ea87cd97aad32dee7bda Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Jan 2025 10:24:58 -0800 Subject: [PATCH 43/85] selftests: tc-testing: reduce rshift value [ Upstream commit e95274dfe86490ec2a5633035c24b2de6722841f ] After previous change rshift >= 32 is no longer allowed. Modify the test to use 31, the test doesn't seem to send any traffic so the exact value shouldn't matter. Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250103182458.1213486-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin (cherry picked from commit e986efbfd2340e006e9878bcf5420386257cb7d0) --- tools/testing/selftests/tc-testing/tc-tests/filters/flow.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 58189327f6444..383fbda07245c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -78,10 +78,10 @@ "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow", - "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass", + "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" From a0d0e1009ab723f8254b3ec4d69fb64a0e72d713 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 28 Dec 2024 17:52:53 +0100 Subject: [PATCH 44/85] ACPI: resource: acpi_dev_irq_override(): Check DMI match last [ Upstream commit cd4a7b2e6a2437a5502910c08128ea3bad55a80b ] acpi_dev_irq_override() gets called approx. 30 times during boot (15 legacy IRQs * 2 override_table entries). Of these 30 calls at max 1 will match the non DMI checks done by acpi_dev_irq_override(). The dmi_check_system() check is by far the most expensive check done by acpi_dev_irq_override(), make this call the last check done by acpi_dev_irq_override() so that it will be called at max 1 time instead of 30 times. Signed-off-by: Hans de Goede Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20241228165253.42584-1-hdegoede@redhat.com [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin (cherry picked from commit 8d5c9a05ff9f798f3d4f56c7fe5b95914add791b) --- drivers/acpi/resource.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index c82b255f82bc4..64d83ff3c0d90 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -680,11 +680,11 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity, for (i = 0; i < ARRAY_SIZE(override_table); i++) { const struct irq_override_cmp *entry = &override_table[i]; - if (dmi_check_system(entry->system) && - entry->irq == gsi && + if (entry->irq == gsi && entry->triggering == triggering && entry->polarity == polarity && - entry->shareable == shareable) + entry->shareable == shareable && + dmi_check_system(entry->system)) return entry->override; } From 54ebb7d783309809000d9f1395f476268ff4caa2 Mon Sep 17 00:00:00 2001 From: Marco Nelissen Date: Wed, 8 Jan 2025 20:11:50 -0800 Subject: [PATCH 45/85] iomap: avoid avoid truncating 64-bit offset to 32 bits [ Upstream commit c13094b894de289514d84b8db56d1f2931a0bade ] on 32-bit kernels, iomap_write_delalloc_scan() was inadvertently using a 32-bit position due to folio_next_index() returning an unsigned long. This could lead to an infinite loop when writing to an xfs filesystem. Signed-off-by: Marco Nelissen Link: https://lore.kernel.org/r/20250109041253.2494374-1-marco.nelissen@gmail.com Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin (cherry picked from commit ca7e2982c3fbc10e9778f55d55d4f9015e1cd94a) --- fs/iomap/buffered-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a05ee2cbb7793..e7e6701806ad2 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1095,7 +1095,7 @@ static int iomap_write_delalloc_scan(struct inode *inode, } /* move offset to start of next folio in range */ - start_byte = folio_next_index(folio) << PAGE_SHIFT; + start_byte = folio_pos(folio) + folio_size(folio); folio_unlock(folio); folio_put(folio); } From 8b283c8df71fa1418d6d2101b46798ce37341182 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 7 Jan 2025 17:27:17 +0100 Subject: [PATCH 46/85] poll_wait: add mb() to fix theoretical race between waitqueue_active() and .poll() [ Upstream commit cacd9ae4bf801ff4125d8961bb9a3ba955e51680 ] As the comment above waitqueue_active() explains, it can only be used if both waker and waiter have mb()'s that pair with each other. However __pollwait() is broken in this respect. This is not pipe-specific, but let's look at pipe_poll() for example: poll_wait(...); // -> __pollwait() -> add_wait_queue() LOAD(pipe->head); LOAD(pipe->head); In theory these LOAD()'s can leak into the critical section inside add_wait_queue() and can happen before list_add(entry, wq_head), in this case pipe_poll() can race with wakeup_pipe_readers/writers which do smp_mb(); if (waitqueue_active(wq_head)) wake_up_interruptible(wq_head); There are more __pollwait()-like functions (grep init_poll_funcptr), and it seems that at least ep_ptable_queue_proc() has the same problem, so the patch adds smp_mb() into poll_wait(). Link: https://lore.kernel.org/all/20250102163320.GA17691@redhat.com/ Signed-off-by: Oleg Nesterov Link: https://lore.kernel.org/r/20250107162717.GA18922@redhat.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin (cherry picked from commit d2a8f847291d1a15ecccf7e17aaf26c972e61443) --- include/linux/poll.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/poll.h b/include/linux/poll.h index d1ea4f3714a84..fc641b50f1298 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -41,8 +41,16 @@ typedef struct poll_table_struct { static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { - if (p && p->_qproc && wait_address) + if (p && p->_qproc && wait_address) { p->_qproc(filp, wait_address, p); + /* + * This memory barrier is paired in the wq_has_sleeper(). + * See the comment above prepare_to_wait(), we need to + * ensure that subsequent tests in this thread can't be + * reordered with __add_wait_queue() in _qproc() paths. + */ + smp_mb(); + } } /* From 8c7cdcf56af13da79cd87adf656c591f3d95fad5 Mon Sep 17 00:00:00 2001 From: Hongguang Gao Date: Wed, 11 Dec 2024 14:09:30 +0530 Subject: [PATCH 47/85] RDMA/bnxt_re: Fix to export port num to ib_query_qp [ Upstream commit 34db8ec931b84d1426423f263b1927539e73b397 ] Current driver implementation doesn't populate the port_num field in query_qp. Adding the code to convert internal firmware port id to ibv defined port number and export it. Reviewed-by: Saravanan Vajravel Reviewed-by: Kalesh AP Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Link: https://patch.msgid.link/20241211083931.968831-5-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin (cherry picked from commit f1d2a394a7132195c78053e485b01375cce58c12) --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + drivers/infiniband/hw/bnxt_re/ib_verbs.h | 4 ++++ drivers/infiniband/hw/bnxt_re/qplib_fp.c | 1 + drivers/infiniband/hw/bnxt_re/qplib_fp.h | 1 + 4 files changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 13c65ec582568..08da793969ee5 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -2220,6 +2220,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = qplib_qp->retry_cnt; qp_attr->rnr_retry = qplib_qp->rnr_retry; qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer; + qp_attr->port_num = __to_ib_port_num(qplib_qp->port_id); qp_attr->rq_psn = qplib_qp->rq.psn; qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic; qp_attr->sq_psn = qplib_qp->sq.psn; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 98baea98fc176..ef910e6e2ccb7 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -245,6 +245,10 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry); +static inline u32 __to_ib_port_num(u16 port_id) +{ + return (u32)port_id + 1; +} unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp); void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 871a49315c880..c4f10498c79d8 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1460,6 +1460,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->dest_qpn = le32_to_cpu(sb->dest_qp_id); memcpy(qp->smac, sb->src_mac, 6); qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id); + qp->port_id = le16_to_cpu(sb->port_id); bail: dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index b5c53e864fbb3..55fd840359ef2 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -297,6 +297,7 @@ struct bnxt_qplib_qp { u32 dest_qpn; u8 smac[6]; u16 vlan_id; + u16 port_id; u8 nw_type; struct bnxt_qplib_ah ah; From e3ee4c7166ef0b7fd4063b23494c9631339ed3e4 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Tue, 17 Dec 2024 18:33:25 -0800 Subject: [PATCH 48/85] nvmet: propagate npwg topology [ Upstream commit b579d6fdc3a9149bb4d2b3133cc0767130ed13e6 ] Ensure we propagate npwg to the target as well instead of assuming its the same logical blocks per physical block. This ensures devices with large IUs information properly propagated on the target. Signed-off-by: Luis Chamberlain Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch Signed-off-by: Sasha Levin (cherry picked from commit 02a5ae2a19871418ecfeff544191ecd6cfb1a419) --- drivers/nvme/target/io-cmd-bdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 468833675cc94..c0b342cc93db3 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) */ id->nsfeat |= 1 << 4; /* NPWG = Namespace Preferred Write Granularity. 0's based */ - id->npwg = lpp0b; + id->npwg = to0based(bdev_io_min(bdev) / bdev_logical_block_size(bdev)); /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ From 7d95230783707612aa2b88e84e67e30874f159fe Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 18 Dec 2024 11:09:18 +0100 Subject: [PATCH 49/85] x86/asm: Make serialize() always_inline [ Upstream commit ae02ae16b76160f0aeeae2c5fb9b15226d00a4ef ] In order to allow serialize() to be used from noinstr code, make it __always_inline. Fixes: 0ef8047b737d ("x86/static-call: provide a way to do very early static-call updates") Closes: https://lore.kernel.org/oe-kbuild-all/202412181756.aJvzih2K-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20241218100918.22167-1-jgross@suse.com Signed-off-by: Sasha Levin (cherry picked from commit cd25bb3481c2a6afab0b1887635b844882910706) --- arch/x86/include/asm/special_insns.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 48f8dd47cf688..1c5513b04f038 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -217,7 +217,7 @@ static inline int write_user_shstk_64(u64 __user *addr, u64 val) #define nop() asm volatile ("nop") -static inline void serialize(void) +static __always_inline void serialize(void) { /* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */ asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory"); From 531401d3bd88eb3c0def0e32cdf90b2c9645ee55 Mon Sep 17 00:00:00 2001 From: Stefan Binding Date: Thu, 9 Jan 2025 16:54:48 +0000 Subject: [PATCH 50/85] ALSA: hda/realtek: Add support for Ayaneo System using CS35L41 HDA commit de5afaddd5a7af6b9c48900741b410ca03e453ae upstream. Add support for Ayaneo Portable Game System. System use 2 CS35L41 Amps with HDA, using Internal boost, with I2C Signed-off-by: Stefan Binding Cc: Link: https://patch.msgid.link/20250109165455.645810-1-sbinding@opensource.cirrus.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 46f7fa9c77d092ed6d38d9c9b3ffb9219d0f0f4b) --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fc93af80f0bff..739f8fd1792bd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10430,6 +10430,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), From 818655fafb5df57030926463da8ca0ac3e16f33d Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 7 Jan 2025 14:54:46 +0800 Subject: [PATCH 51/85] zram: fix potential UAF of zram table commit 212fe1c0df4a150fb6298db2cfff267ceaba5402 upstream. If zram_meta_alloc failed early, it frees allocated zram->table without setting it NULL. Which will potentially cause zram_meta_free to access the table if user reset an failed and uninitialized device. Link: https://lkml.kernel.org/r/20250107065446.86928-1-ryncsn@gmail.com Fixes: 74363ec674cb ("zram: fix uninitialized ZRAM not releasing backing device") Signed-off-by: Kairui Song Reviewed-by: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b6cbca38780619a9e8479f4da660f1565abf714c) --- drivers/block/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1e257ecd624db..b73038ad86f7f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1262,6 +1262,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) zram->mem_pool = zs_create_pool(zram->disk->disk_name); if (!zram->mem_pool) { vfree(zram->table); + zram->table = NULL; return false; } From 832d9b87c920c0aece5ce220a595dc549550458f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 22 Nov 2024 14:26:18 +0200 Subject: [PATCH 52/85] i2c: atr: Fix client detach commit cefc479cbb50399dec0c8e996f3539c48a1ee9dd upstream. i2c-atr catches the BUS_NOTIFY_DEL_DEVICE event on the bus and removes the translation by calling i2c_atr_detach_client(). However, BUS_NOTIFY_DEL_DEVICE happens when the device is about to be removed from this bus, i.e. before removal, and thus before calling .remove() on the driver. If the driver happens to do any i2c transactions in its remove(), they will fail. Fix this by catching BUS_NOTIFY_REMOVED_DEVICE instead, thus removing the translation only after the device is actually removed. Fixes: a076a860acae ("media: i2c: add I2C Address Translator (ATR) support") Cc: stable@vger.kernel.org Signed-off-by: Tomi Valkeinen Reviewed-by: Luca Ceresoli Reviewed-by: Romain Gantois Tested-by: Romain Gantois Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f57f2201610f4f941f51a3ccdb1f333f20391560) --- drivers/i2c/i2c-atr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c index 8ca1daadec937..c03196da11635 100644 --- a/drivers/i2c/i2c-atr.c +++ b/drivers/i2c/i2c-atr.c @@ -412,7 +412,7 @@ static int i2c_atr_bus_notifier_call(struct notifier_block *nb, dev_name(dev), ret); break; - case BUS_NOTIFY_DEL_DEVICE: + case BUS_NOTIFY_REMOVED_DEVICE: i2c_atr_detach_client(client->adapter, client); break; From 073dcb8d01b323d7348aa35ade7cbe0b22b75b1b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 13 Jan 2025 16:44:56 +0100 Subject: [PATCH 53/85] mptcp: be sure to send ack when mptcp-level window re-opens commit 2ca06a2f65310aeef30bb69b7405437a14766e4d upstream. mptcp_cleanup_rbuf() is responsible to send acks when the user-space reads enough data to update the receive windows significantly. It tries hard to avoid acquiring the subflow sockets locks by checking conditions similar to the ones implemented at the TCP level. To avoid too much code duplication - the MPTCP protocol can't reuse the TCP helpers as part of the relevant status is maintained into the msk socket - and multiple costly window size computation, mptcp_cleanup_rbuf uses a rough estimate for the most recently advertised window size: the MPTCP receive free space, as recorded as at last-ack time. Unfortunately the above does not allow mptcp_cleanup_rbuf() to detect a zero to non-zero win change in some corner cases, skipping the tcp_cleanup_rbuf call and leaving the peer stuck. After commit ea66758c1795 ("tcp: allow MPTCP to update the announced window"), MPTCP has actually cheap access to the announced window value. Use it in mptcp_cleanup_rbuf() for a more accurate ack generation. Fixes: e3859603ba13 ("mptcp: better msk receive window updates") Cc: stable@vger.kernel.org Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/20250107131845.5e5de3c5@kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250113-net-mptcp-connect-st-flakes-v1-1-0d986ee7b1b6@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1c3689c80c244da64b9acf77d495810d85d94f2f) --- net/mptcp/options.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 2e1539027e6d3..8e6a6dc6e0a40 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -607,7 +607,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; - WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -1287,7 +1286,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); } - return; + goto update_wspace; } if (rcv_wnd_new != rcv_wnd_old) { @@ -1312,6 +1311,9 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } + +update_wspace: + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) From c197a86c71ab5915a17e78c0839ec5df853441c0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 13 Jan 2025 16:44:57 +0100 Subject: [PATCH 54/85] mptcp: fix spurious wake-up on under memory pressure commit e226d9259dc4f5d2c19e6682ad1356fa97cf38f4 upstream. The wake-up condition currently implemented by mptcp_epollin_ready() is wrong, as it could mark the MPTCP socket as readable even when no data are present and the system is under memory pressure. Explicitly check for some data being available in the receive queue. Fixes: 5684ab1a0eff ("mptcp: give rcvlowat some love") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250113-net-mptcp-connect-st-flakes-v1-2-0d986ee7b1b6@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 7ced92f6a179dde61ebe2bfee27cf4500605b7e9) --- net/mptcp/protocol.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 89d1c299ff2b9..88c762de77287 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -685,10 +685,15 @@ static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) static inline bool mptcp_epollin_ready(const struct sock *sk) { + u64 data_avail = mptcp_data_avail(mptcp_sk(sk)); + + if (!data_avail) + return false; + /* mptcp doesn't have to deal with small skbs in the receive queue, - * at it can always coalesce them + * as it can always coalesce them */ - return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + return (data_avail >= sk->sk_rcvlowat) || (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) || READ_ONCE(tcp_memory_pressure); From b64314cb6d919258ca63995863d4b6e2ff619124 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 13 Jan 2025 16:44:58 +0100 Subject: [PATCH 55/85] selftests: mptcp: avoid spurious errors on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 218cc166321fb3cc8786677ffe0d09a78778a910 upstream. The disconnect test-case generates spurious errors: INFO: disconnect INFO: extra options: -I 3 -i /tmp/tmp.r43niviyoI 01 ns1 MPTCP -> ns1 (10.0.1.1:10000 ) MPTCP (duration 140ms) [FAIL] file received by server does not match (in, out): Unexpected revents: POLLERR/POLLNVAL(19) -rw-r--r-- 1 root root 10028676 Jan 10 10:47 /tmp/tmp.r43niviyoI.disconnect Trailing bytes are: ��\����R���!8��u2��5N% -rw------- 1 root root 9992290 Jan 10 10:47 /tmp/tmp.Os4UbnWbI1 Trailing bytes are: ��\����R���!8��u2��5N% 02 ns1 MPTCP -> ns1 (dead:beef:1::1:10001) MPTCP (duration 206ms) [ OK ] 03 ns1 MPTCP -> ns1 (dead:beef:1::1:10002) TCP (duration 31ms) [ OK ] 04 ns1 TCP -> ns1 (dead:beef:1::1:10003) MPTCP (duration 26ms) [ OK ] [FAIL] Tests of the full disconnection have failed Time: 2 seconds The root cause is actually in the user-space bits: the test program currently disconnects as soon as all the pending data has been spooled, generating an FASTCLOSE. If such option reaches the peer before the latter has reached the closed status, the msk socket will report an error to the user-space, as per protocol specification, causing the above failure. Address the issue explicitly waiting for all the relevant sockets to reach a closed status before performing the disconnect. Fixes: 05be5e273c84 ("selftests: mptcp: add disconnect tests") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250113-net-mptcp-connect-st-flakes-v1-3-0d986ee7b1b6@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a1ec09e3412e4c7b37ab82f58a14d489213b2476) --- .../selftests/net/mptcp/mptcp_connect.c | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 4209b95690394..414addef9a451 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include #include +#include + #include #include @@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -M | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the MPTCP sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1281,9 +1302,9 @@ int main_loop(void) return ret; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking From d0b7e61096a4a5f243e3148363bd655fed591933 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 12 Jan 2025 22:59:59 +0100 Subject: [PATCH 56/85] net: ethernet: xgbe: re-add aneg to supported features in PHY quirks commit 6be7aca91009865d8c2b73589270224a6b6e67ab upstream. In 4.19, before the switch to linkmode bitmaps, PHY_GBIT_FEATURES included feature bits for aneg and TP/MII ports. SUPPORTED_TP | \ SUPPORTED_MII) SUPPORTED_10baseT_Full) SUPPORTED_100baseT_Full) SUPPORTED_1000baseT_Full) PHY_100BT_FEATURES | \ PHY_DEFAULT_FEATURES) PHY_1000BT_FEATURES) Referenced commit expanded PHY_GBIT_FEATURES, silently removing PHY_DEFAULT_FEATURES. The removed part can be re-added by using the new PHY_GBIT_FEATURES definition. Not clear to me is why nobody seems to have noticed this issue. I stumbled across this when checking what it takes to make phy_10_100_features_array et al private to phylib. Fixes: d0939c26c53a ("net: ethernet: xgbe: expand PHY_GBIT_FEAUTRES") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/46521973-7738-4157-9f5e-0bb6f694acba@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c3dde42d8308b124dc337e9d1fb8dd4ee7eaa875) --- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 6a716337f48be..268399dfcf22f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -923,7 +923,6 @@ static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata) static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int phy_id = phy_data->phydev->phy_id; @@ -945,14 +944,7 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) phy_write(phy_data->phydev, 0x04, 0x0d01); phy_write(phy_data->phydev, 0x00, 0x9140); - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - supported); - linkmode_set_bit_array(phy_gbit_features_array, - ARRAY_SIZE(phy_gbit_features_array), - supported); - - linkmode_copy(phy_data->phydev->supported, supported); + linkmode_copy(phy_data->phydev->supported, PHY_GBIT_FEATURES); phy_support_asym_pause(phy_data->phydev); @@ -964,7 +956,6 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata) static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) { - __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; struct xgbe_phy_data *phy_data = pdata->phy_data; struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom; unsigned int phy_id = phy_data->phydev->phy_id; @@ -1028,13 +1019,7 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) reg = phy_read(phy_data->phydev, 0x00); phy_write(phy_data->phydev, 0x00, reg & ~0x00800); - linkmode_set_bit_array(phy_10_100_features_array, - ARRAY_SIZE(phy_10_100_features_array), - supported); - linkmode_set_bit_array(phy_gbit_features_array, - ARRAY_SIZE(phy_gbit_features_array), - supported); - linkmode_copy(phy_data->phydev->supported, supported); + linkmode_copy(phy_data->phydev->supported, PHY_GBIT_FEATURES); phy_support_asym_pause(phy_data->phydev); netif_dbg(pdata, drv, pdata->netdev, From 9e2f359dacc97ba58bfc76ff692566ee4eb05a9a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 10 Jan 2025 09:35:08 +0100 Subject: [PATCH 57/85] vsock/bpf: return early if transport is not assigned commit f6abafcd32f9cfc4b1a2f820ecea70773e26d423 upstream. Some of the core functions can only be called if the transport has been assigned. As Michal reported, a socket might have the transport at NULL, for example after a failed connect(), causing the following trace: BUG: kernel NULL pointer dereference, address: 00000000000000a0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 12faf8067 P4D 12faf8067 PUD 113670067 PMD 0 Oops: Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 15 UID: 0 PID: 1198 Comm: a.out Not tainted 6.13.0-rc2+ RIP: 0010:vsock_connectible_has_data+0x1f/0x40 Call Trace: vsock_bpf_recvmsg+0xca/0x5e0 sock_recvmsg+0xb9/0xc0 __sys_recvfrom+0xb3/0x130 __x64_sys_recvfrom+0x20/0x30 do_syscall_64+0x93/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e So we need to check the `vsk->transport` in vsock_bpf_recvmsg(), especially for connected sockets (stream/seqpacket) as we already do in __vsock_connectible_recvmsg(). Fixes: 634f1a7110b4 ("vsock: support sockmap") Cc: stable@vger.kernel.org Reported-by: Michal Luczaj Closes: https://lore.kernel.org/netdev/5ca20d4c-1017-49c2-9516-f6f75fd331e9@rbox.co/ Tested-by: Michal Luczaj Reported-by: syzbot+3affdbfc986ecd9200fd@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/677f84a8.050a0220.25a300.01b3.GAE@google.com/ Tested-by: syzbot+3affdbfc986ecd9200fd@syzkaller.appspotmail.com Reviewed-by: Hyunwoo Kim Acked-by: Michael S. Tsirkin Reviewed-by: Luigi Leonardi Signed-off-by: Stefano Garzarella Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 96b1ec6f49d7be3440a52627b4cd0da51b0f98ab) --- net/vmw_vsock/vsock_bpf.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 4aa6e74ec2957..f201d9eca1df2 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -77,6 +77,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_psock *psock; + struct vsock_sock *vsk; int copied; psock = sk_psock_get(sk); @@ -84,6 +85,13 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, return __vsock_recvmsg(sk, msg, len, flags); lock_sock(sk); + vsk = vsock_sk(sk); + + if (!vsk->transport) { + copied = -ENODEV; + goto out; + } + if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) { release_sock(sk); sk_psock_put(sk, psock); @@ -108,6 +116,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, copied = sk_msg_recvmsg(sk, psock, msg, len, flags); } +out: release_sock(sk); sk_psock_put(sk, psock); From 18ad0af81c4267fc7bb7d817b46956e779937bc3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 10 Jan 2025 09:35:07 +0100 Subject: [PATCH 58/85] vsock/virtio: discard packets if the transport changes commit 2cb7c756f605ec02ffe562fb26828e4bcc5fdfc1 upstream. If the socket has been de-assigned or assigned to another transport, we must discard any packets received because they are not expected and would cause issues when we access vsk->transport. A possible scenario is described by Hyunwoo Kim in the attached link, where after a first connect() interrupted by a signal, and a second connect() failed, we can find `vsk->transport` at NULL, leading to a NULL pointer dereference. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Cc: stable@vger.kernel.org Reported-by: Hyunwoo Kim Reported-by: Wongi Lee Closes: https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/ Signed-off-by: Stefano Garzarella Reviewed-by: Hyunwoo Kim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 47bc95a3e0b178bc7ab8e1807fe816e2f4476eee) --- net/vmw_vsock/virtio_transport_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 43495820b64fb..d19c6493db243 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1428,8 +1428,11 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, lock_sock(sk); - /* Check if sk has been closed before lock_sock */ - if (sock_flag(sk, SOCK_DONE)) { + /* Check if sk has been closed or assigned to another transport before + * lock_sock (note: listener sockets are not assigned to any transport) + */ + if (sock_flag(sk, SOCK_DONE) || + (sk->sk_state != TCP_LISTEN && vsk->transport != &t->transport)) { (void)virtio_transport_reset_no_sock(t, skb); release_sock(sk); sock_put(sk); From 990fa0d860d8bed32394a1d49efe711d303489f5 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 10 Jan 2025 09:35:09 +0100 Subject: [PATCH 59/85] vsock/virtio: cancel close work in the destructor commit df137da9d6d166e87e40980e36eb8e0bc90483ef upstream. During virtio_transport_release() we can schedule a delayed work to perform the closing of the socket before destruction. The destructor is called either when the socket is really destroyed (reference counter to zero), or it can also be called when we are de-assigning the transport. In the former case, we are sure the delayed work has completed, because it holds a reference until it completes, so the destructor will definitely be called after the delayed work is finished. But in the latter case, the destructor is called by AF_VSOCK core, just after the release(), so there may still be delayed work scheduled. Refactor the code, moving the code to delete the close work already in the do_close() to a new function. Invoke it during destruction to make sure we don't leave any pending work. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Cc: stable@vger.kernel.org Reported-by: Hyunwoo Kim Closes: https://lore.kernel.org/netdev/Z37Sh+utS+iV3+eb@v4bel-B760M-AORUS-ELITE-AX/ Signed-off-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Tested-by: Hyunwoo Kim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman (cherry picked from commit b7aeebee0b6c27d6b815b0fd614a668e5ca92b08) --- net/vmw_vsock/virtio_transport_common.c | 29 ++++++++++++++++++------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index d19c6493db243..c57fe7ddcf73b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* Threshold for detecting small packets to copy */ #define GOOD_COPY_LEN 128 +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout); + static const struct virtio_transport * virtio_transport_get_ops(struct vsock_sock *vsk) { @@ -922,6 +925,8 @@ void virtio_transport_destruct(struct vsock_sock *vsk) { struct virtio_vsock_sock *vvs = vsk->trans; + virtio_transport_cancel_close_work(vsk, true); + kfree(vvs); vsk->trans = NULL; } @@ -1004,17 +1009,11 @@ static void virtio_transport_wait_close(struct sock *sk, long timeout) } } -static void virtio_transport_do_close(struct vsock_sock *vsk, - bool cancel_timeout) +static void virtio_transport_cancel_close_work(struct vsock_sock *vsk, + bool cancel_timeout) { struct sock *sk = sk_vsock(vsk); - sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; - if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = TCP_CLOSING; - sk->sk_state_change(sk); - if (vsk->close_work_scheduled && (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { vsk->close_work_scheduled = false; @@ -1026,6 +1025,20 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, } } +static void virtio_transport_do_close(struct vsock_sock *vsk, + bool cancel_timeout) +{ + struct sock *sk = sk_vsock(vsk); + + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = TCP_CLOSING; + sk->sk_state_change(sk); + + virtio_transport_cancel_close_work(vsk, cancel_timeout); +} + static void virtio_transport_close_timeout(struct work_struct *work) { struct vsock_sock *vsk = From b3c2d9378d91145bac26f7a93c97d73dcc4de0bf Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 10 Jan 2025 09:35:10 +0100 Subject: [PATCH 60/85] vsock: reset socket state when de-assigning the transport commit a24009bc9be60242651a21702609381b5092459e upstream. Transport's release() and destruct() are called when de-assigning the vsock transport. These callbacks can touch some socket state like sock flags, sk_state, and peer_shutdown. Since we are reassigning the socket to a new transport during vsock_connect(), let's reset these fields to have a clean state with the new transport. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d2a1836212c49e2ef3763e6358637b609ce02035) --- net/vmw_vsock/af_vsock.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 6e1cd71d33a59..9317f9bb68d20 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -490,6 +490,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ vsk->transport->release(vsk); vsock_deassign_transport(vsk); + + /* transport's release() and destruct() can touch some socket + * state, since we are reassigning the socket to a new transport + * during vsock_connect(), let's reset these fields to have a + * clean state. + */ + sock_reset_flag(sk, SOCK_DONE); + sk->sk_state = TCP_CLOSE; + vsk->peer_shutdown = 0; } /* We increase the module refcnt to prevent the transport unloading From 6a157b6243f34e30bfc948cb295a505d93170bfe Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 10 Jan 2025 09:35:11 +0100 Subject: [PATCH 61/85] vsock: prevent null-ptr-deref in vsock_*[has_data|has_space] commit 91751e248256efc111e52e15115840c35d85abaf upstream. Recent reports have shown how we sometimes call vsock_*_has_data() when a vsock socket has been de-assigned from a transport (see attached links), but we shouldn't. Previous commits should have solved the real problems, but we may have more in the future, so to avoid null-ptr-deref, we can return 0 (no space, no data available) but with a warning. This way the code should continue to run in a nearly consistent state and have a warning that allows us to debug future problems. Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/netdev/Z2K%2FI4nlHdfMRTZC@v4bel-B760M-AORUS-ELITE-AX/ Link: https://lore.kernel.org/netdev/5ca20d4c-1017-49c2-9516-f6f75fd331e9@rbox.co/ Link: https://lore.kernel.org/netdev/677f84a8.050a0220.25a300.01b3.GAE@google.com/ Co-developed-by: Hyunwoo Kim Signed-off-by: Hyunwoo Kim Co-developed-by: Wongi Lee Signed-off-by: Wongi Lee Signed-off-by: Stefano Garzarella Reviewed-by: Luigi Leonardi Reviewed-by: Hyunwoo Kim Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c216f66aa25b5f46491153ec3967d243b098f17a) --- net/vmw_vsock/af_vsock.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9317f9bb68d20..2050d888df2ae 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -875,6 +875,9 @@ EXPORT_SYMBOL_GPL(vsock_create_connected); s64 vsock_stream_has_data(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_data(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_data); @@ -883,6 +886,9 @@ s64 vsock_connectible_has_data(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); + if (WARN_ON(!vsk->transport)) + return 0; + if (sk->sk_type == SOCK_SEQPACKET) return vsk->transport->seqpacket_has_data(vsk); else @@ -892,6 +898,9 @@ EXPORT_SYMBOL_GPL(vsock_connectible_has_data); s64 vsock_stream_has_space(struct vsock_sock *vsk) { + if (WARN_ON(!vsk->transport)) + return 0; + return vsk->transport->stream_has_space(vsk); } EXPORT_SYMBOL_GPL(vsock_stream_has_space); From 0bb2b5a9ca4f0a19a17f0e01b936d71458a4aa40 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 9 Jan 2025 10:55:53 +1000 Subject: [PATCH 62/85] nouveau/fence: handle cross device fences properly commit 1f9910b41c857a892b83801feebdc7bdf38c5985 upstream. The fence sync logic doesn't handle a fence sync across devices as it tries to write to a channel offset from one device into the fence bo from a different device, which won't work so well. This patch fixes that to avoid using the sync path in the case where the fences come from different nouveau drm devices. This works fine on a single device as the fence bo is shared across the devices, and mapped into each channels vma space, the channel offsets are therefore okay to pass between sides, so one channel can sync on the seqnos from the other by using the offset into it's vma. Signed-off-by: Dave Airlie Cc: stable@vger.kernel.org Reviewed-by: Ben Skeggs [ Fix compilation issue; remove version log from commit messsage. - Danilo ] Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20250109005553.623947-1-airlied@gmail.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 7ef28366be721b0ed790948004bbd16a2751b836) --- drivers/gpu/drm/nouveau/nouveau_fence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 93f08f9479d89..03eacb22648ef 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -386,11 +386,13 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (f) { struct nouveau_channel *prev; bool must_wait = true; + bool local; rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || - fctx->sync(f, prev, chan) == 0)) + local = prev && prev->cli->drm == chan->cli->drm; + if (local && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); if (!must_wait) From 9945d2a39c19563ae008477ac797455e15cc2708 Mon Sep 17 00:00:00 2001 From: Marco Nelissen Date: Thu, 2 Jan 2025 11:04:11 -0800 Subject: [PATCH 63/85] filemap: avoid truncating 64-bit offset to 32 bits commit f505e6c91e7a22d10316665a86d79f84d9f0ba76 upstream. On 32-bit kernels, folio_seek_hole_data() was inadvertently truncating a 64-bit value to 32 bits, leading to a possible infinite loop when writing to an xfs filesystem. Link: https://lkml.kernel.org/r/20250102190540.1356838-1-marco.nelissen@gmail.com Fixes: 54fa39ac2e00 ("iomap: use mapping_seek_hole_data") Signed-off-by: Marco Nelissen Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 3cc9ff4205883855a051a20b40cdda1e3ca6d6b5) --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 2c308413387ff..6a3d62de1cca7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3037,7 +3037,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, if (ops->is_partially_uptodate(folio, offset, bsz) == seek_data) break; - start = (start + bsz) & ~(bsz - 1); + start = (start + bsz) & ~((u64)bsz - 1); offset += bsz; } while (offset < folio_size(folio)); unlock: From 44f60da769bdc08c75ee2c8bf9900e441a6febb3 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 10 Jan 2025 10:28:21 -0500 Subject: [PATCH 64/85] fs/proc: fix softlockup in __read_vmcore (part 2) commit cbc5dde0a461240046e8a41c43d7c3b76d5db952 upstream. Since commit 5cbcb62dddf5 ("fs/proc: fix softlockup in __read_vmcore") the number of softlockups in __read_vmcore at kdump time have gone down, but they still happen sometimes. In a memory constrained environment like the kdump image, a softlockup is not just a harmless message, but it can interfere with things like RCU freeing memory, causing the crashdump to get stuck. The second loop in __read_vmcore has a lot more opportunities for natural sleep points, like scheduling out while waiting for a data write to happen, but apparently that is not always enough. Add a cond_resched() to the second loop in __read_vmcore to (hopefully) get rid of the softlockups. Link: https://lkml.kernel.org/r/20250110102821.2a37581b@fangorn Fixes: 5cbcb62dddf5 ("fs/proc: fix softlockup in __read_vmcore") Signed-off-by: Rik van Riel Reported-by: Breno Leitao Cc: Baoquan He Cc: Dave Young Cc: Vivek Goyal Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e15d45678814799e89ad53b271fd1399c55a4f72) --- fs/proc/vmcore.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 8319bcbe3ee36..3303cb04e12c3 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -404,6 +404,8 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) if (!iov_iter_count(iter)) return acc; } + + cond_resched(); } return acc; From 6ecef2005eeda5651df2ed3d96c5c46c0cfe5523 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 10 Jan 2025 11:33:54 -0500 Subject: [PATCH 65/85] gpio: xilinx: Convert gpio_lock to raw spinlock commit 9860370c2172704b6b4f0075a0c2a29fd84af96a upstream. irq_chip functions may be called in raw spinlock context. Therefore, we must also use a raw spinlock for our own internal locking. This fixes the following lockdep splat: [ 5.349336] ============================= [ 5.353349] [ BUG: Invalid wait context ] [ 5.357361] 6.13.0-rc5+ #69 Tainted: G W [ 5.363031] ----------------------------- [ 5.367045] kworker/u17:1/44 is trying to lock: [ 5.371587] ffffff88018b02c0 (&chip->gpio_lock){....}-{3:3}, at: xgpio_irq_unmask (drivers/gpio/gpio-xilinx.c:433 (discriminator 8)) [ 5.380079] other info that might help us debug this: [ 5.385138] context-{5:5} [ 5.387762] 5 locks held by kworker/u17:1/44: [ 5.392123] #0: ffffff8800014958 ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work (kernel/workqueue.c:3204) [ 5.402260] #1: ffffffc082fcbdd8 (deferred_probe_work){+.+.}-{0:0}, at: process_one_work (kernel/workqueue.c:3205) [ 5.411528] #2: ffffff880172c900 (&dev->mutex){....}-{4:4}, at: __device_attach (drivers/base/dd.c:1006) [ 5.419929] #3: ffffff88039c8268 (request_class#2){+.+.}-{4:4}, at: __setup_irq (kernel/irq/internals.h:156 kernel/irq/manage.c:1596) [ 5.428331] #4: ffffff88039c80c8 (lock_class#2){....}-{2:2}, at: __setup_irq (kernel/irq/manage.c:1614) [ 5.436472] stack backtrace: [ 5.439359] CPU: 2 UID: 0 PID: 44 Comm: kworker/u17:1 Tainted: G W 6.13.0-rc5+ #69 [ 5.448690] Tainted: [W]=WARN [ 5.451656] Hardware name: xlnx,zynqmp (DT) [ 5.455845] Workqueue: events_unbound deferred_probe_work_func [ 5.461699] Call trace: [ 5.464147] show_stack+0x18/0x24 C [ 5.467821] dump_stack_lvl (lib/dump_stack.c:123) [ 5.471501] dump_stack (lib/dump_stack.c:130) [ 5.474824] __lock_acquire (kernel/locking/lockdep.c:4828 kernel/locking/lockdep.c:4898 kernel/locking/lockdep.c:5176) [ 5.478758] lock_acquire (arch/arm64/include/asm/percpu.h:40 kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5851 kernel/locking/lockdep.c:5814) [ 5.482429] _raw_spin_lock_irqsave (include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) [ 5.486797] xgpio_irq_unmask (drivers/gpio/gpio-xilinx.c:433 (discriminator 8)) [ 5.490737] irq_enable (kernel/irq/internals.h:236 kernel/irq/chip.c:170 kernel/irq/chip.c:439 kernel/irq/chip.c:432 kernel/irq/chip.c:345) [ 5.494060] __irq_startup (kernel/irq/internals.h:241 kernel/irq/chip.c:180 kernel/irq/chip.c:250) [ 5.497645] irq_startup (kernel/irq/chip.c:270) [ 5.501143] __setup_irq (kernel/irq/manage.c:1807) [ 5.504728] request_threaded_irq (kernel/irq/manage.c:2208) Fixes: a32c7caea292 ("gpio: gpio-xilinx: Add interrupt support") Signed-off-by: Sean Anderson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250110163354.2012654-1-sean.anderson@linux.dev Signed-off-by: Bartosz Golaszewski Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0b310f417457463c0e8953b4d8c029210be69d77) --- drivers/gpio/gpio-xilinx.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index a16945e8319e3..956ea29578336 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -66,7 +66,7 @@ struct xgpio_instance { DECLARE_BITMAP(state, 64); DECLARE_BITMAP(last_irq_read, 64); DECLARE_BITMAP(dir, 64); - spinlock_t gpio_lock; /* For serializing operations */ + raw_spinlock_t gpio_lock; /* For serializing operations */ int irq; DECLARE_BITMAP(enable, 64); DECLARE_BITMAP(rising_edge, 64); @@ -180,14 +180,14 @@ static void xgpio_set(struct gpio_chip *gc, unsigned int gpio, int val) struct xgpio_instance *chip = gpiochip_get_data(gc); int bit = xgpio_to_bit(chip, gpio); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); /* Write to GPIO signal and set its direction to output */ __assign_bit(bit, chip->state, val); xgpio_write_ch(chip, XGPIO_DATA_OFFSET, bit, chip->state); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -211,7 +211,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, bitmap_remap(hw_mask, mask, chip->sw_map, chip->hw_map, 64); bitmap_remap(hw_bits, bits, chip->sw_map, chip->hw_map, 64); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); bitmap_replace(state, chip->state, hw_bits, hw_mask, 64); @@ -219,7 +219,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, bitmap_copy(chip->state, state, 64); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -237,13 +237,13 @@ static int xgpio_dir_in(struct gpio_chip *gc, unsigned int gpio) struct xgpio_instance *chip = gpiochip_get_data(gc); int bit = xgpio_to_bit(chip, gpio); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); /* Set the GPIO bit in shadow register and set direction as input */ __set_bit(bit, chip->dir); xgpio_write_ch(chip, XGPIO_TRI_OFFSET, bit, chip->dir); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); return 0; } @@ -266,7 +266,7 @@ static int xgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) struct xgpio_instance *chip = gpiochip_get_data(gc); int bit = xgpio_to_bit(chip, gpio); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); /* Write state of GPIO signal */ __assign_bit(bit, chip->state, val); @@ -276,7 +276,7 @@ static int xgpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) __clear_bit(bit, chip->dir); xgpio_write_ch(chip, XGPIO_TRI_OFFSET, bit, chip->dir); - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); return 0; } @@ -404,7 +404,7 @@ static void xgpio_irq_mask(struct irq_data *irq_data) int bit = xgpio_to_bit(chip, irq_offset); u32 mask = BIT(bit / 32), temp; - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); __clear_bit(bit, chip->enable); @@ -414,7 +414,7 @@ static void xgpio_irq_mask(struct irq_data *irq_data) temp &= ~mask; xgpio_writereg(chip->regs + XGPIO_IPIER_OFFSET, temp); } - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); gpiochip_disable_irq(&chip->gc, irq_offset); } @@ -434,7 +434,7 @@ static void xgpio_irq_unmask(struct irq_data *irq_data) gpiochip_enable_irq(&chip->gc, irq_offset); - spin_lock_irqsave(&chip->gpio_lock, flags); + raw_spin_lock_irqsave(&chip->gpio_lock, flags); __set_bit(bit, chip->enable); @@ -453,7 +453,7 @@ static void xgpio_irq_unmask(struct irq_data *irq_data) xgpio_writereg(chip->regs + XGPIO_IPIER_OFFSET, val); } - spin_unlock_irqrestore(&chip->gpio_lock, flags); + raw_spin_unlock_irqrestore(&chip->gpio_lock, flags); } /** @@ -518,7 +518,7 @@ static void xgpio_irqhandler(struct irq_desc *desc) chained_irq_enter(irqchip, desc); - spin_lock(&chip->gpio_lock); + raw_spin_lock(&chip->gpio_lock); xgpio_read_ch_all(chip, XGPIO_DATA_OFFSET, all); @@ -535,7 +535,7 @@ static void xgpio_irqhandler(struct irq_desc *desc) bitmap_copy(chip->last_irq_read, all, 64); bitmap_or(all, rising, falling, 64); - spin_unlock(&chip->gpio_lock); + raw_spin_unlock(&chip->gpio_lock); dev_dbg(gc->parent, "IRQ rising %*pb falling %*pb\n", 64, rising, 64, falling); @@ -626,7 +626,7 @@ static int xgpio_probe(struct platform_device *pdev) bitmap_set(chip->hw_map, 0, width[0]); bitmap_set(chip->hw_map, 32, width[1]); - spin_lock_init(&chip->gpio_lock); + raw_spin_lock_init(&chip->gpio_lock); chip->gc.base = -1; chip->gc.ngpio = bitmap_weight(chip->hw_map, 64); From 3a8bcd83ee2d2b8a2d3df402dc345daec6a54655 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Wed, 15 Jan 2025 09:41:18 +0800 Subject: [PATCH 66/85] pmdomain: imx8mp-blk-ctrl: add missing loop break condition commit 726efa92e02b460811e8bc6990dd742f03b645ea upstream. Currently imx8mp_blk_ctrl_remove() will continue the for loop until an out-of-bounds exception occurs. pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : dev_pm_domain_detach+0x8/0x48 lr : imx8mp_blk_ctrl_shutdown+0x58/0x90 sp : ffffffc084f8bbf0 x29: ffffffc084f8bbf0 x28: ffffff80daf32ac0 x27: 0000000000000000 x26: ffffffc081658d78 x25: 0000000000000001 x24: ffffffc08201b028 x23: ffffff80d0db9490 x22: ffffffc082340a78 x21: 00000000000005b0 x20: ffffff80d19bc180 x19: 000000000000000a x18: ffffffffffffffff x17: ffffffc080a39e08 x16: ffffffc080a39c98 x15: 4f435f464f006c72 x14: 0000000000000004 x13: ffffff80d0172110 x12: 0000000000000000 x11: ffffff80d0537740 x10: ffffff80d05376c0 x9 : ffffffc0808ed2d8 x8 : ffffffc084f8bab0 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffffff80d19b9420 x4 : fffffffe03466e60 x3 : 0000000080800077 x2 : 0000000000000000 x1 : 0000000000000001 x0 : 0000000000000000 Call trace: dev_pm_domain_detach+0x8/0x48 platform_shutdown+0x2c/0x48 device_shutdown+0x158/0x268 kernel_restart_prepare+0x40/0x58 kernel_kexec+0x58/0xe8 __do_sys_reboot+0x198/0x258 __arm64_sys_reboot+0x2c/0x40 invoke_syscall+0x5c/0x138 el0_svc_common.constprop.0+0x48/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x38/0xc8 el0t_64_sync_handler+0x120/0x130 el0t_64_sync+0x190/0x198 Code: 8128c2d0 ffffffc0 aa1e03e9 d503201f Fixes: 556f5cf9568a ("soc: imx: add i.MX8MP HSIO blk-ctrl") Cc: stable@vger.kernel.org Signed-off-by: Xiaolei Wang Reviewed-by: Lucas Stach Reviewed-by: Fabio Estevam Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20250115014118.4086729-1-xiaolei.wang@windriver.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 507e873ca5c8337052637ec7fcbee161826f368d) --- drivers/pmdomain/imx/imx8mp-blk-ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pmdomain/imx/imx8mp-blk-ctrl.c b/drivers/pmdomain/imx/imx8mp-blk-ctrl.c index 31693add7d633..faf643a4a5d06 100644 --- a/drivers/pmdomain/imx/imx8mp-blk-ctrl.c +++ b/drivers/pmdomain/imx/imx8mp-blk-ctrl.c @@ -767,7 +767,7 @@ static int imx8mp_blk_ctrl_remove(struct platform_device *pdev) of_genpd_del_provider(pdev->dev.of_node); - for (i = 0; bc->onecell_data.num_domains; i++) { + for (i = 0; i < bc->onecell_data.num_domains; i++) { struct imx8mp_blk_ctrl_domain *domain = &bc->domains[i]; pm_genpd_remove(&domain->genpd); From ec4c39ad355f741a3d4ba47e59ec36174fac685e Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Sun, 15 Dec 2024 12:39:45 +0900 Subject: [PATCH 67/85] irqchip: Plug a OF node reference leak in platform_irqchip_probe() commit 9322d1915f9d976ee48c09d800fbd5169bc2ddcc upstream. platform_irqchip_probe() leaks a OF node when irq_init_cb() fails. Fix it by declaring par_np with the __free(device_node) cleanup construct. This bug was found by an experimental static analysis tool that I am developing. Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros") Signed-off-by: Joe Hattori Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241215033945.3414223-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Greg Kroah-Hartman (cherry picked from commit d010c33bdb76be2e61d7b33bf82f9da540d546f9) --- drivers/irqchip/irqchip.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c index 1eeb0d0156ce9..0ee7b6b71f5fa 100644 --- a/drivers/irqchip/irqchip.c +++ b/drivers/irqchip/irqchip.c @@ -35,11 +35,10 @@ void __init irqchip_init(void) int platform_irqchip_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - struct device_node *par_np = of_irq_find_parent(np); + struct device_node *par_np __free(device_node) = of_irq_find_parent(np); of_irq_init_cb_t irq_init_cb = of_device_get_match_data(&pdev->dev); if (!irq_init_cb) { - of_node_put(par_np); return -EINVAL; } @@ -55,7 +54,6 @@ int platform_irqchip_probe(struct platform_device *pdev) * interrupt controller can check for specific domains as necessary. */ if (par_np && !irq_find_matching_host(par_np, DOMAIN_BUS_ANY)) { - of_node_put(par_np); return -EPROBE_DEFER; } From e7ad02572c92732077d9ead4361ee5b317be1b8c Mon Sep 17 00:00:00 2001 From: Yogesh Lal Date: Fri, 20 Dec 2024 15:09:07 +0530 Subject: [PATCH 68/85] irqchip/gic-v3: Handle CPU_PM_ENTER_FAILED correctly commit 0d62a49ab55c99e8deb4593b8d9f923de1ab5c18 upstream. When a CPU attempts to enter low power mode, it disables the redistributor and Group 1 interrupts and reinitializes the system registers upon wakeup. If the transition into low power mode fails, then the CPU_PM framework invokes the PM notifier callback with CPU_PM_ENTER_FAILED to allow the drivers to undo the state changes. The GIC V3 driver ignores CPU_PM_ENTER_FAILED, which leaves the GIC in disabled state. Handle CPU_PM_ENTER_FAILED in the same way as CPU_PM_EXIT to restore normal operation. [ tglx: Massage change log, add Fixes tag ] Fixes: 3708d52fc6bb ("irqchip: gic-v3: Implement CPU PM notifier") Signed-off-by: Yogesh Lal Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241220093907.2747601-1-quic_ylal@quicinc.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9e05b60795a427d2843f369830929d960ab6a56c) --- drivers/irqchip/irq-gic-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index e7f000f90bb46..6c7943c516eb0 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1460,7 +1460,7 @@ static int gic_retrigger(struct irq_data *data) static int gic_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { + if (cmd == CPU_PM_EXIT || cmd == CPU_PM_ENTER_FAILED) { if (gic_dist_security_disabled()) gic_enable_redist(true); gic_cpu_sys_reg_init(); From 76ea8880a2d1f09aeb0aa1dc363162ab9da6f6b4 Mon Sep 17 00:00:00 2001 From: Tomas Krcka Date: Mon, 30 Dec 2024 15:08:25 +0000 Subject: [PATCH 69/85] irqchip/gic-v3-its: Don't enable interrupts in its_irq_set_vcpu_affinity() commit 35cb2c6ce7da545f3b5cb1e6473ad7c3a6f08310 upstream. The following call-chain leads to enabling interrupts in a nested interrupt disabled section: irq_set_vcpu_affinity() irq_get_desc_lock() raw_spin_lock_irqsave() <--- Disable interrupts its_irq_set_vcpu_affinity() guard(raw_spinlock_irq) <--- Enables interrupts when leaving the guard() irq_put_desc_unlock() <--- Warns because interrupts are enabled This was broken in commit b97e8a2f7130, which replaced the original raw_spin_[un]lock() pair with guard(raw_spinlock_irq). Fix the issue by using guard(raw_spinlock). [ tglx: Massaged change log ] Fixes: b97e8a2f7130 ("irqchip/gic-v3-its: Fix potential race condition in its_vlpi_prop_update()") Signed-off-by: Tomas Krcka Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241230150825.62894-1-krckatom@amazon.de Signed-off-by: Greg Kroah-Hartman (cherry picked from commit bb527bf15dd044521b25704fe0e3c57a85ddfbf6) --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 9ea0f1394d97f..d95de5f8f59d1 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1970,7 +1970,7 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) if (!is_v4(its_dev->its)) return -EINVAL; - guard(raw_spinlock_irq)(&its_dev->event_map.vlpi_lock); + guard(raw_spinlock)(&its_dev->event_map.vlpi_lock); /* Unmap request? */ if (!info) From 885f908ddfd36635a3418802fb74eb007b8bec8d Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 20 Dec 2024 22:44:21 +0900 Subject: [PATCH 70/85] hrtimers: Handle CPU state correctly on hotplug commit 2f8dea1692eef2b7ba6a256246ed82c365fdc686 upstream. Consider a scenario where a CPU transitions from CPUHP_ONLINE to halfway through a CPU hotunplug down to CPUHP_HRTIMERS_PREPARE, and then back to CPUHP_ONLINE: Since hrtimers_prepare_cpu() does not run, cpu_base.hres_active remains set to 1 throughout. However, during a CPU unplug operation, the tick and the clockevents are shut down at CPUHP_AP_TICK_DYING. On return to the online state, for instance CFS incorrectly assumes that the hrtick is already active, and the chance of the clockevent device to transition to oneshot mode is also lost forever for the CPU, unless it goes back to a lower state than CPUHP_HRTIMERS_PREPARE once. This round-trip reveals another issue; cpu_base.online is not set to 1 after the transition, which appears as a WARN_ON_ONCE in enqueue_hrtimer(). Aside of that, the bulk of the per CPU state is not reset either, which means there are dangling pointers in the worst case. Address this by adding a corresponding startup() callback, which resets the stale per CPU state and sets the online flag. [ tglx: Make the new callback unconditionally available, remove the online modification in the prepare() callback and clear the remaining state in the starting callback instead of the prepare callback ] Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier") Signed-off-by: Koichiro Den Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241220134421.3809834-1-koichiro.den@canonical.com Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4c7869d2d12c30842df36eb66f5a7eaa3817c009) --- include/linux/hrtimer.h | 1 + kernel/cpu.c | 2 +- kernel/time/hrtimer.c | 11 ++++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 254d4a898179c..8f77bb0f4ae0c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -532,6 +532,7 @@ extern void __init hrtimers_init(void); extern void sysrq_timer_list_show(void); int hrtimers_prepare_cpu(unsigned int cpu); +int hrtimers_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int hrtimers_cpu_dying(unsigned int cpu); #else diff --git a/kernel/cpu.c b/kernel/cpu.c index 0c72b94ed076a..7ab11b4597684 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2206,7 +2206,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { }, [CPUHP_AP_HRTIMERS_DYING] = { .name = "hrtimers:dying", - .startup.single = NULL, + .startup.single = hrtimers_cpu_starting, .teardown.single = hrtimers_cpu_dying, }, diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 57e5cb36f1bc9..e99b1305e1a5f 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2180,6 +2180,15 @@ int hrtimers_prepare_cpu(unsigned int cpu) } cpu_base->cpu = cpu; + hrtimer_cpu_base_init_expiry_lock(cpu_base); + return 0; +} + +int hrtimers_cpu_starting(unsigned int cpu) +{ + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); + + /* Clear out any left over state from a CPU down operation */ cpu_base->active_bases = 0; cpu_base->hres_active = 0; cpu_base->hang_detected = 0; @@ -2188,7 +2197,6 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; cpu_base->online = 1; - hrtimer_cpu_base_init_expiry_lock(cpu_base); return 0; } @@ -2266,6 +2274,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu) void __init hrtimers_init(void) { hrtimers_prepare_cpu(smp_processor_id()); + hrtimers_cpu_starting(smp_processor_id()); open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); } From e0c994692bb7fa192eeeb52a84489274b73ada1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 29 Nov 2024 08:50:11 +0200 Subject: [PATCH 71/85] drm/i915/fb: Relax clear color alignment to 64 bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1a5401ec3018c101c456cdbda2eaef9482db6786 upstream. Mesa changed its clear color alignment from 4k to 64 bytes without informing the kernel side about the change. This is now likely to cause framebuffer creation to fail. The only thing we do with the clear color buffer in i915 is: 1. map a single page 2. read out bytes 16-23 from said page 3. unmap the page So the only requirement we really have is that those 8 bytes are all contained within one page. Thus we can deal with the Mesa regression by reducing the alignment requiment from 4k to the same 64 bytes in the kernel. We could even go as low as 32 bytes, but IIRC 64 bytes is the hardware requirement on the 3D engine side so matching that seems sensible. Note that the Mesa alignment chages were partially undone so the regression itself was already fixed on userspace side. Cc: stable@vger.kernel.org Cc: Sagar Ghuge Cc: Nanley Chery Reported-by: Xi Ruoyao Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13057 Closes: https://lore.kernel.org/all/45a5bba8de009347262d86a4acb27169d9ae0d9f.camel@xry111.site/ Link: https://gitlab.freedesktop.org/mesa/mesa/-/commit/17f97a69c13832a6c1b0b3aad45b06f07d4b852f Link: https://gitlab.freedesktop.org/mesa/mesa/-/commit/888f63cf1baf34bc95e847a30a041dc7798edddb Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20241129065014.8363-2-ville.syrjala@linux.intel.com Tested-by: Xi Ruoyao Reviewed-by: José Roberto de Souza (cherry picked from commit ed3a892e5e3d6b3f6eeb76db7c92a968aeb52f3d) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c85105e981c8c2c9bd8115418637826298fb7a96) --- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index ca98d3d34fb9a..5095b14ff1401 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -1626,7 +1626,7 @@ int intel_fill_fb_info(struct drm_i915_private *i915, struct intel_framebuffer * * arithmetic related to alignment and offset calculation. */ if (is_gen12_ccs_cc_plane(&fb->base, i)) { - if (IS_ALIGNED(fb->base.offsets[i], PAGE_SIZE)) + if (IS_ALIGNED(fb->base.offsets[i], 64)) continue; else return -EINVAL; From 58db6d26e40684e5c31a9575cea93583c6ab5405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 20 Dec 2024 16:21:11 +0100 Subject: [PATCH 72/85] drm/amdgpu: always sync the GFX pipe on ctx switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit af04b320c71c4b59971f021615876808a36e5038 upstream. That is needed to enforce isolation between contexts. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit def59436fb0d3ca0f211d14873d0273d69ebb405) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman (cherry picked from commit cb7cca2d5d24f5c24bffc048b89ab05d7131866d) --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6aa3b1d845abe..806ec5d021995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -193,8 +193,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) || - (amdgpu_sriov_vf(adev) && need_ctx_switch) || - amdgpu_vm_need_pipeline_sync(ring, job))) { + need_ctx_switch || amdgpu_vm_need_pipeline_sync(ring, job))) { + need_pipe_sync = true; if (tmp) From 5fc1a004403d228b60b43d9ccf97a80f9028b221 Mon Sep 17 00:00:00 2001 From: Terry Tritton Date: Fri, 17 Jan 2025 15:16:51 +0000 Subject: [PATCH 73/85] Revert "PCI: Use preserve_config in place of pci_flags" This reverts commit 3e221877dd92dfeccc840700868e7fef2675181b which is commit 7246a4520b4bf1494d7d030166a11b5226f6d508 upstream. This patch causes a regression in cuttlefish/crossvm boot on arm64. The patch was part of a series that when applied will not cause a regression but this patch was backported to the 6.6 branch by itself. The other patches do not apply cleanly to the 6.6 branch. Signed-off-by: Terry Tritton Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 642c70ce073e1ffa24fa6244086e295c779980df) --- drivers/pci/controller/pci-host-common.c | 4 ++++ drivers/pci/probe.c | 20 +++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/pci/controller/pci-host-common.c b/drivers/pci/controller/pci-host-common.c index e2602e38ae452..6be3266cd7b5b 100644 --- a/drivers/pci/controller/pci-host-common.c +++ b/drivers/pci/controller/pci-host-common.c @@ -73,6 +73,10 @@ int pci_host_common_probe(struct platform_device *pdev) if (IS_ERR(cfg)) return PTR_ERR(cfg); + /* Do not reassign resources if probe only */ + if (!pci_has_flag(PCI_PROBE_ONLY)) + pci_add_flags(PCI_REASSIGN_ALL_BUS); + bridge->sysdata = cfg; bridge->ops = (struct pci_ops *)&ops->pci_ops; bridge->msi_domain = true; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 7e84e472b3383..03b519a228403 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -3096,18 +3096,20 @@ int pci_host_probe(struct pci_host_bridge *bridge) bus = bridge->bus; - /* If we must preserve the resource configuration, claim now */ - if (bridge->preserve_config) - pci_bus_claim_resources(bus); - /* - * Assign whatever was left unassigned. If we didn't claim above, - * this will reassign everything. + * We insert PCI resources into the iomem_resource and + * ioport_resource trees in either pci_bus_claim_resources() + * or pci_bus_assign_resources(). */ - pci_assign_unassigned_root_bus_resources(bus); + if (pci_has_flag(PCI_PROBE_ONLY)) { + pci_bus_claim_resources(bus); + } else { + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); - list_for_each_entry(child, &bus->children, node) - pcie_bus_configure_settings(child); + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + } pci_bus_add_devices(bus); return 0; From c42b9a6770c3d24d729f87427b9556a9ca66e165 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Tue, 12 Nov 2024 10:30:10 +0100 Subject: [PATCH 74/85] iio: imu: inv_icm42600: fix spi burst write not supported commit c0f866de4ce447bca3191b9cefac60c4b36a7922 upstream. Burst write with SPI is not working for all icm42600 chips. It was only used for setting user offsets with regmap_bulk_write. Add specific SPI regmap config for using only single write with SPI. Fixes: 9f9ff91b775b ("iio: imu: inv_icm42600: add SPI driver for inv_icm42600 driver") Cc: stable@vger.kernel.org Signed-off-by: Jean-Baptiste Maneyrol Link: https://patch.msgid.link/20241112-inv-icm42600-fix-spi-burst-write-not-supported-v2-1-97690dc03607@tdk.com Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0b8321d20a07aca249641502e5c61ab5849c6296) --- drivers/iio/imu/inv_icm42600/inv_icm42600.h | 1 + drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 11 +++++++++++ drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c | 3 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index 0e290c807b0f9..94c0eb0bf8748 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -362,6 +362,7 @@ struct inv_icm42600_state { typedef int (*inv_icm42600_bus_setup)(struct inv_icm42600_state *); extern const struct regmap_config inv_icm42600_regmap_config; +extern const struct regmap_config inv_icm42600_spi_regmap_config; extern const struct dev_pm_ops inv_icm42600_pm_ops; const struct iio_mount_matrix * diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c index d938bc4543972..da65aa4e27242 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c @@ -44,6 +44,17 @@ const struct regmap_config inv_icm42600_regmap_config = { }; EXPORT_SYMBOL_NS_GPL(inv_icm42600_regmap_config, IIO_ICM42600); +/* define specific regmap for SPI not supporting burst write */ +const struct regmap_config inv_icm42600_spi_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = 0x4FFF, + .ranges = inv_icm42600_regmap_ranges, + .num_ranges = ARRAY_SIZE(inv_icm42600_regmap_ranges), + .use_single_write = true, +}; +EXPORT_SYMBOL_NS_GPL(inv_icm42600_spi_regmap_config, IIO_ICM42600); + struct inv_icm42600_hw { uint8_t whoami; const char *name; diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c index 6be4ac7949379..abfa1b73cf4d3 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c @@ -59,7 +59,8 @@ static int inv_icm42600_probe(struct spi_device *spi) return -EINVAL; chip = (uintptr_t)match; - regmap = devm_regmap_init_spi(spi, &inv_icm42600_regmap_config); + /* use SPI specific regmap */ + regmap = devm_regmap_init_spi(spi, &inv_icm42600_spi_regmap_config); if (IS_ERR(regmap)) return PTR_ERR(regmap); From 3fc342a27b2a1b569ba812a3fc9879045507251a Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 25 Sep 2024 20:04:15 +0530 Subject: [PATCH 75/85] drm/amd/display: Fix out-of-bounds access in 'dcn21_link_encoder_create' commit 63de35a8fcfca59ae8750d469a7eb220c7557baf upstream. An issue was identified in the dcn21_link_encoder_create function where an out-of-bounds access could occur when the hpd_source index was used to reference the link_enc_hpd_regs array. This array has a fixed size and the index was not being checked against the array's bounds before accessing it. This fix adds a conditional check to ensure that the hpd_source index is within the valid range of the link_enc_hpd_regs array. If the index is out of bounds, the function now returns NULL to prevent undefined behavior. References: [ 65.920507] ------------[ cut here ]------------ [ 65.920510] UBSAN: array-index-out-of-bounds in drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn21/dcn21_resource.c:1312:29 [ 65.920519] index 7 is out of range for type 'dcn10_link_enc_hpd_registers [5]' [ 65.920523] CPU: 3 PID: 1178 Comm: modprobe Tainted: G OE 6.8.0-cleanershaderfeatureresetasdntipmi200nv2132 #13 [ 65.920525] Hardware name: AMD Majolica-RN/Majolica-RN, BIOS WMJ0429N_Weekly_20_04_2 04/29/2020 [ 65.920527] Call Trace: [ 65.920529] [ 65.920532] dump_stack_lvl+0x48/0x70 [ 65.920541] dump_stack+0x10/0x20 [ 65.920543] __ubsan_handle_out_of_bounds+0xa2/0xe0 [ 65.920549] dcn21_link_encoder_create+0xd9/0x140 [amdgpu] [ 65.921009] link_create+0x6d3/0xed0 [amdgpu] [ 65.921355] create_links+0x18a/0x4e0 [amdgpu] [ 65.921679] dc_create+0x360/0x720 [amdgpu] [ 65.921999] ? dmi_matches+0xa0/0x220 [ 65.922004] amdgpu_dm_init+0x2b6/0x2c90 [amdgpu] [ 65.922342] ? console_unlock+0x77/0x120 [ 65.922348] ? dev_printk_emit+0x86/0xb0 [ 65.922354] dm_hw_init+0x15/0x40 [amdgpu] [ 65.922686] amdgpu_device_init+0x26a8/0x33a0 [amdgpu] [ 65.922921] amdgpu_driver_load_kms+0x1b/0xa0 [amdgpu] [ 65.923087] amdgpu_pci_probe+0x1b7/0x630 [amdgpu] [ 65.923087] local_pci_probe+0x4b/0xb0 [ 65.923087] pci_device_probe+0xc8/0x280 [ 65.923087] really_probe+0x187/0x300 [ 65.923087] __driver_probe_device+0x85/0x130 [ 65.923087] driver_probe_device+0x24/0x110 [ 65.923087] __driver_attach+0xac/0x1d0 [ 65.923087] ? __pfx___driver_attach+0x10/0x10 [ 65.923087] bus_for_each_dev+0x7d/0xd0 [ 65.923087] driver_attach+0x1e/0x30 [ 65.923087] bus_add_driver+0xf2/0x200 [ 65.923087] driver_register+0x64/0x130 [ 65.923087] ? __pfx_amdgpu_init+0x10/0x10 [amdgpu] [ 65.923087] __pci_register_driver+0x61/0x70 [ 65.923087] amdgpu_init+0x7d/0xff0 [amdgpu] [ 65.923087] do_one_initcall+0x49/0x310 [ 65.923087] ? kmalloc_trace+0x136/0x360 [ 65.923087] do_init_module+0x6a/0x270 [ 65.923087] load_module+0x1fce/0x23a0 [ 65.923087] init_module_from_file+0x9c/0xe0 [ 65.923087] ? init_module_from_file+0x9c/0xe0 [ 65.923087] idempotent_init_module+0x179/0x230 [ 65.923087] __x64_sys_finit_module+0x5d/0xa0 [ 65.923087] do_syscall_64+0x76/0x120 [ 65.923087] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 65.923087] RIP: 0033:0x7f2d80f1e88d [ 65.923087] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 b5 0f 00 f7 d8 64 89 01 48 [ 65.923087] RSP: 002b:00007ffc7bc1aa78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 65.923087] RAX: ffffffffffffffda RBX: 0000564c9c1db130 RCX: 00007f2d80f1e88d [ 65.923087] RDX: 0000000000000000 RSI: 0000564c9c1e5480 RDI: 000000000000000f [ 65.923087] RBP: 0000000000040000 R08: 0000000000000000 R09: 0000000000000002 [ 65.923087] R10: 000000000000000f R11: 0000000000000246 R12: 0000564c9c1e5480 [ 65.923087] R13: 0000564c9c1db260 R14: 0000000000000000 R15: 0000564c9c1e54b0 [ 65.923087] [ 65.923927] ---[ end trace ]--- Cc: Tom Chung Cc: Rodrigo Siqueira Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Cc: Harry Wentland Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Roman Li Signed-off-by: Alex Deucher Signed-off-by: Bin Lan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6e9d1a646cebf2d7fb9e66ff2cd004f185ed2719) --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d1a25fe6c44fa..8dffa5b6426e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1315,7 +1315,7 @@ static struct link_encoder *dcn21_link_encoder_create( kzalloc(sizeof(struct dcn21_link_encoder), GFP_KERNEL); int link_regs_id; - if (!enc21) + if (!enc21 || enc_init_data->hpd_source >= ARRAY_SIZE(link_enc_hpd_regs)) return NULL; link_regs_id = From 1d5241295d45d8e2e4317ad1e65ddd108bd955c6 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 4 Nov 2024 19:00:05 +0800 Subject: [PATCH 76/85] block: fix uaf for flush rq while iterating tags commit 3802f73bd80766d70f319658f334754164075bc3 upstream. blk_mq_clear_flush_rq_mapping() is not called during scsi probe, by checking blk_queue_init_done(). However, QUEUE_FLAG_INIT_DONE is cleared in del_gendisk by commit aec89dc5d421 ("block: keep q_usage_counter in atomic mode after del_gendisk"), hence for disk like scsi, following blk_mq_destroy_queue() will not clear flush rq from tags->rqs[] as well, cause following uaf that is found by our syzkaller for v6.6: ================================================================== BUG: KASAN: slab-use-after-free in blk_mq_find_and_get_req+0x16e/0x1a0 block/blk-mq-tag.c:261 Read of size 4 at addr ffff88811c969c20 by task kworker/1:2H/224909 CPU: 1 PID: 224909 Comm: kworker/1:2H Not tainted 6.6.0-ga836a5060850 #32 Workqueue: kblockd blk_mq_timeout_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x91/0xf0 lib/dump_stack.c:106 print_address_description.constprop.0+0x66/0x300 mm/kasan/report.c:364 print_report+0x3e/0x70 mm/kasan/report.c:475 kasan_report+0xb8/0xf0 mm/kasan/report.c:588 blk_mq_find_and_get_req+0x16e/0x1a0 block/blk-mq-tag.c:261 bt_iter block/blk-mq-tag.c:288 [inline] __sbitmap_for_each_set include/linux/sbitmap.h:295 [inline] sbitmap_for_each_set include/linux/sbitmap.h:316 [inline] bt_for_each+0x455/0x790 block/blk-mq-tag.c:325 blk_mq_queue_tag_busy_iter+0x320/0x740 block/blk-mq-tag.c:534 blk_mq_timeout_work+0x1a3/0x7b0 block/blk-mq.c:1673 process_one_work+0x7c4/0x1450 kernel/workqueue.c:2631 process_scheduled_works kernel/workqueue.c:2704 [inline] worker_thread+0x804/0xe40 kernel/workqueue.c:2785 kthread+0x346/0x450 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:293 Allocated by task 942: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc mm/kasan/common.c:383 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:380 kasan_kmalloc include/linux/kasan.h:198 [inline] __do_kmalloc_node mm/slab_common.c:1007 [inline] __kmalloc_node+0x69/0x170 mm/slab_common.c:1014 kmalloc_node include/linux/slab.h:620 [inline] kzalloc_node include/linux/slab.h:732 [inline] blk_alloc_flush_queue+0x144/0x2f0 block/blk-flush.c:499 blk_mq_alloc_hctx+0x601/0x940 block/blk-mq.c:3788 blk_mq_alloc_and_init_hctx+0x27f/0x330 block/blk-mq.c:4261 blk_mq_realloc_hw_ctxs+0x488/0x5e0 block/blk-mq.c:4294 blk_mq_init_allocated_queue+0x188/0x860 block/blk-mq.c:4350 blk_mq_init_queue_data block/blk-mq.c:4166 [inline] blk_mq_init_queue+0x8d/0x100 block/blk-mq.c:4176 scsi_alloc_sdev+0x843/0xd50 drivers/scsi/scsi_scan.c:335 scsi_probe_and_add_lun+0x77c/0xde0 drivers/scsi/scsi_scan.c:1189 __scsi_scan_target+0x1fc/0x5a0 drivers/scsi/scsi_scan.c:1727 scsi_scan_channel drivers/scsi/scsi_scan.c:1815 [inline] scsi_scan_channel+0x14b/0x1e0 drivers/scsi/scsi_scan.c:1791 scsi_scan_host_selected+0x2fe/0x400 drivers/scsi/scsi_scan.c:1844 scsi_scan+0x3a0/0x3f0 drivers/scsi/scsi_sysfs.c:151 store_scan+0x2a/0x60 drivers/scsi/scsi_sysfs.c:191 dev_attr_store+0x5c/0x90 drivers/base/core.c:2388 sysfs_kf_write+0x11c/0x170 fs/sysfs/file.c:136 kernfs_fop_write_iter+0x3fc/0x610 fs/kernfs/file.c:338 call_write_iter include/linux/fs.h:2083 [inline] new_sync_write+0x1b4/0x2d0 fs/read_write.c:493 vfs_write+0x76c/0xb00 fs/read_write.c:586 ksys_write+0x127/0x250 fs/read_write.c:639 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x70/0x120 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x78/0xe2 Freed by task 244687: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x50 mm/kasan/generic.c:522 ____kasan_slab_free mm/kasan/common.c:236 [inline] __kasan_slab_free+0x12a/0x1b0 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:164 [inline] slab_free_hook mm/slub.c:1815 [inline] slab_free_freelist_hook mm/slub.c:1841 [inline] slab_free mm/slub.c:3807 [inline] __kmem_cache_free+0xe4/0x520 mm/slub.c:3820 blk_free_flush_queue+0x40/0x60 block/blk-flush.c:520 blk_mq_hw_sysfs_release+0x4a/0x170 block/blk-mq-sysfs.c:37 kobject_cleanup+0x136/0x410 lib/kobject.c:689 kobject_release lib/kobject.c:720 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x119/0x140 lib/kobject.c:737 blk_mq_release+0x24f/0x3f0 block/blk-mq.c:4144 blk_free_queue block/blk-core.c:298 [inline] blk_put_queue+0xe2/0x180 block/blk-core.c:314 blkg_free_workfn+0x376/0x6e0 block/blk-cgroup.c:144 process_one_work+0x7c4/0x1450 kernel/workqueue.c:2631 process_scheduled_works kernel/workqueue.c:2704 [inline] worker_thread+0x804/0xe40 kernel/workqueue.c:2785 kthread+0x346/0x450 kernel/kthread.c:388 ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:293 Other than blk_mq_clear_flush_rq_mapping(), the flag is only used in blk_register_queue() from initialization path, hence it's safe not to clear the flag in del_gendisk. And since QUEUE_FLAG_REGISTERED already make sure that queue should only be registered once, there is no need to test the flag as well. Fixes: 6cfeadbff3f8 ("blk-mq: don't clear flush_rq from tags->rqs[]") Depends-on: commit aec89dc5d421 ("block: keep q_usage_counter in atomic mode after del_gendisk") Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20241104110005.1412161-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: BRUNO VERNAY Signed-off-by: Hugo SIMELIERE Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 98f4867bea6708434e7018557a15b565329a517a) --- block/blk-sysfs.c | 6 ++---- block/genhd.c | 9 +++------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 63e4812623361..4990a19e60133 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -842,10 +842,8 @@ int blk_register_queue(struct gendisk *disk) * faster to shut down and is made fully functional here as * request_queues for non-existent devices never get registered. */ - if (!blk_queue_init_done(q)) { - blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); - percpu_ref_switch_to_percpu(&q->q_usage_counter); - } + blk_queue_flag_set(QUEUE_FLAG_INIT_DONE, q); + percpu_ref_switch_to_percpu(&q->q_usage_counter); return ret; diff --git a/block/genhd.c b/block/genhd.c index 203c880c3e1cd..6d704c37f26e7 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -710,13 +710,10 @@ void del_gendisk(struct gendisk *disk) * If the disk does not own the queue, allow using passthrough requests * again. Else leave the queue frozen to fail all I/O. */ - if (!test_bit(GD_OWNS_QUEUE, &disk->state)) { - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + if (!test_bit(GD_OWNS_QUEUE, &disk->state)) __blk_mq_unfreeze_queue(q, true); - } else { - if (queue_is_mq(q)) - blk_mq_exit_queue(q); - } + else if (queue_is_mq(q)) + blk_mq_exit_queue(q); } EXPORT_SYMBOL(del_gendisk); From b33ec4970689238eccacbf067d2b6a5cb634b6b7 Mon Sep 17 00:00:00 2001 From: Mohammed Anees Date: Tue, 24 Sep 2024 09:32:57 +0000 Subject: [PATCH 77/85] ocfs2: fix deadlock in ocfs2_get_system_file_inode commit 7bf1823e010e8db2fb649c790bd1b449a75f52d8 upstream. syzbot has found a possible deadlock in ocfs2_get_system_file_inode [1]. The scenario is depicted here, CPU0 CPU1 lock(&ocfs2_file_ip_alloc_sem_key); lock(&osb->system_file_mutex); lock(&ocfs2_file_ip_alloc_sem_key); lock(&osb->system_file_mutex); The function calls which could lead to this are: CPU0 ocfs2_mknod - lock(&ocfs2_file_ip_alloc_sem_key); . . . ocfs2_get_system_file_inode - lock(&osb->system_file_mutex); CPU1 - ocfs2_fill_super - lock(&osb->system_file_mutex); . . . ocfs2_read_virt_blocks - lock(&ocfs2_file_ip_alloc_sem_key); This issue can be resolved by making the down_read -> down_read_try in the ocfs2_read_virt_blocks. [1] https://syzkaller.appspot.com/bug?extid=e0055ea09f1f5e6fabdd Link: https://lkml.kernel.org/r/20240924093257.7181-1-pvmohammedanees2003@gmail.com Signed-off-by: Mohammed Anees Reviewed-by: Joseph Qi Reported-by: Closes: https://syzkaller.appspot.com/bug?extid=e0055ea09f1f5e6fabdd Tested-by: syzbot+e0055ea09f1f5e6fabdd@syzkaller.appspotmail.com Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Cc: Signed-off-by: Andrew Morton Cc: Xingyu Li Cc: Zheng Zhang Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 75d8d969648c3fb51da95db2a99dc7596a20ae50) --- fs/ocfs2/extent_map.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 70a768b623cf4..f7672472fa827 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -973,7 +973,13 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, } while (done < nr) { - down_read(&OCFS2_I(inode)->ip_alloc_sem); + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { + rc = -EAGAIN; + mlog(ML_ERROR, + "Inode #%llu ip_alloc_sem is temporarily unavailable\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno); + break; + } rc = ocfs2_extent_map_get_blocks(inode, v_block + done, &p_block, &p_count, NULL); up_read(&OCFS2_I(inode)->ip_alloc_sem); From dbcb07151c2e56a9724825c21fc52a7dd68e10eb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 5 Jan 2025 17:24:03 +0100 Subject: [PATCH 78/85] ovl: pass realinode to ovl_encode_real_fh() instead of realdentry commit 07aeefae7ff44d80524375253980b1bdee2396b0 upstream. We want to be able to encode an fid from an inode with no alias. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250105162404.357058-2-amir73il@gmail.com Signed-off-by: Christian Brauner Stable-dep-of: c45beebfde34 ("ovl: support encoding fid from inode with no alias") Signed-off-by: Sasha Levin [re-applied over v6.6.71 with conflict resolved] Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 80b78b326819e691f14ef7de63b7a4aa7190f163) --- fs/overlayfs/copy_up.c | 11 ++++++----- fs/overlayfs/export.c | 5 +++-- fs/overlayfs/namei.c | 4 ++-- fs/overlayfs/overlayfs.h | 2 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index ada3fcc9c6d50..e97bcf15c689c 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -371,13 +371,13 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry, return err; } -struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, bool is_upper) { struct ovl_fh *fh; int fh_type, dwords; int buflen = MAX_HANDLE_SZ; - uuid_t *uuid = &real->d_sb->s_uuid; + uuid_t *uuid = &realinode->i_sb->s_uuid; int err; /* Make sure the real fid stays 32bit aligned */ @@ -394,7 +394,8 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, * the price or reconnecting the dentry. */ dwords = buflen >> 2; - fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0); + fh_type = exportfs_encode_inode_fh(realinode, (void *)fh->fb.fid, + &dwords, NULL, 0); buflen = (dwords << 2); err = -EIO; @@ -438,7 +439,7 @@ int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, * up and a pure upper inode. */ if (ovl_can_decode_fh(lower->d_sb)) { - fh = ovl_encode_real_fh(ofs, lower, false); + fh = ovl_encode_real_fh(ofs, d_inode(lower), false); if (IS_ERR(fh)) return PTR_ERR(fh); } @@ -461,7 +462,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, const struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, upper, true); + fh = ovl_encode_real_fh(ofs, d_inode(upper), true); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 611ff567a1aa6..c56e4e0b8054c 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -228,6 +228,7 @@ static int ovl_check_encode_origin(struct dentry *dentry) static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, u32 *fid, int buflen) { + struct inode *inode = d_inode(dentry); struct ovl_fh *fh = NULL; int err, enc_lower; int len; @@ -241,8 +242,8 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, goto fail; /* Encode an upper or lower file handle */ - fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) : - ovl_dentry_upper(dentry), !enc_lower); + fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_inode_lower(inode) : + ovl_inode_upper(inode), !enc_lower); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 80391c687c2ad..273a39d3e9513 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -523,7 +523,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, real, is_upper); + fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper); err = PTR_ERR(fh); if (IS_ERR(fh)) { fh = NULL; @@ -720,7 +720,7 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct ovl_fh *fh; int err; - fh = ovl_encode_real_fh(ofs, origin, false); + fh = ovl_encode_real_fh(ofs, d_inode(origin), false); if (IS_ERR(fh)) return PTR_ERR(fh); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 09ca82ed0f8ce..981967e507b3e 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -821,7 +821,7 @@ int ovl_copy_up_with_data(struct dentry *dentry); int ovl_maybe_copy_up(struct dentry *dentry, int flags); int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new); int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); -struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, +struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, bool is_upper); int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, struct dentry *upper); From 748e7dcdb9b86518c4340f9173016f5b952da9da Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 5 Jan 2025 17:24:04 +0100 Subject: [PATCH 79/85] ovl: support encoding fid from inode with no alias commit c45beebfde34aa71afbc48b2c54cdda623515037 upstream. Dmitry Safonov reported that a WARN_ON() assertion can be trigered by userspace when calling inotify_show_fdinfo() for an overlayfs watched inode, whose dentry aliases were discarded with drop_caches. The WARN_ON() assertion in inotify_show_fdinfo() was removed, because it is possible for encoding file handle to fail for other reason, but the impact of failing to encode an overlayfs file handle goes beyond this assertion. As shown in the LTP test case mentioned in the link below, failure to encode an overlayfs file handle from a non-aliased inode also leads to failure to report an fid with FAN_DELETE_SELF fanotify events. As Dmitry notes in his analyzis of the problem, ovl_encode_fh() fails if it cannot find an alias for the inode, but this failure can be fixed. ovl_encode_fh() seldom uses the alias and in the case of non-decodable file handles, as is often the case with fanotify fid info, ovl_encode_fh() never needs to use the alias to encode a file handle. Defer finding an alias until it is actually needed so ovl_encode_fh() will not fail in the common case of FAN_DELETE_SELF fanotify events. Fixes: 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles") Reported-by: Dmitry Safonov Closes: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiie81voLZZi2zXS1BziXZCM24nXqPAxbu8kxXCUWdwOg@mail.gmail.com/ Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20250105162404.357058-3-amir73il@gmail.com Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 16de9b9f7c81a299813b42d9bb40b0c1d63d1e5f) --- fs/overlayfs/export.c | 46 +++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index c56e4e0b8054c..3a17e4366f28c 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -181,35 +181,37 @@ static int ovl_connect_layer(struct dentry *dentry) * * Return 0 for upper file handle, > 0 for lower file handle or < 0 on error. */ -static int ovl_check_encode_origin(struct dentry *dentry) +static int ovl_check_encode_origin(struct inode *inode) { - struct ovl_fs *ofs = OVL_FS(dentry->d_sb); + struct ovl_fs *ofs = OVL_FS(inode->i_sb); bool decodable = ofs->config.nfs_export; + struct dentry *dentry; + int err; /* No upper layer? */ if (!ovl_upper_mnt(ofs)) return 1; /* Lower file handle for non-upper non-decodable */ - if (!ovl_dentry_upper(dentry) && !decodable) + if (!ovl_inode_upper(inode) && !decodable) return 1; /* Upper file handle for pure upper */ - if (!ovl_dentry_lower(dentry)) + if (!ovl_inode_lower(inode)) return 0; /* * Root is never indexed, so if there's an upper layer, encode upper for * root. */ - if (dentry == dentry->d_sb->s_root) + if (inode == d_inode(inode->i_sb->s_root)) return 0; /* * Upper decodable file handle for non-indexed upper. */ - if (ovl_dentry_upper(dentry) && decodable && - !ovl_test_flag(OVL_INDEX, d_inode(dentry))) + if (ovl_inode_upper(inode) && decodable && + !ovl_test_flag(OVL_INDEX, inode)) return 0; /* @@ -218,17 +220,25 @@ static int ovl_check_encode_origin(struct dentry *dentry) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (d_is_dir(dentry) && decodable) - return ovl_connect_layer(dentry); + if (!decodable || !S_ISDIR(inode->i_mode)) + return 1; + + dentry = d_find_any_alias(inode); + if (!dentry) + return -ENOENT; + + err = ovl_connect_layer(dentry); + dput(dentry); + if (err < 0) + return err; /* Lower file handle for indexed and non-upper dir/non-dir */ return 1; } -static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, +static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode, u32 *fid, int buflen) { - struct inode *inode = d_inode(dentry); struct ovl_fh *fh = NULL; int err, enc_lower; int len; @@ -237,7 +247,7 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, * Check if we should encode a lower or upper file handle and maybe * copy up an ancestor to make lower file handle connectable. */ - err = enc_lower = ovl_check_encode_origin(dentry); + err = enc_lower = ovl_check_encode_origin(inode); if (enc_lower < 0) goto fail; @@ -257,8 +267,8 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry, return err; fail: - pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n", - dentry, err); + pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n", + inode->i_ino, err); goto out; } @@ -266,19 +276,13 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, struct inode *parent) { struct ovl_fs *ofs = OVL_FS(inode->i_sb); - struct dentry *dentry; int bytes, buflen = *max_len << 2; /* TODO: encode connectable file handles */ if (parent) return FILEID_INVALID; - dentry = d_find_any_alias(inode); - if (!dentry) - return FILEID_INVALID; - - bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); - dput(dentry); + bytes = ovl_dentry_to_fid(ofs, inode, fid, buflen); if (bytes <= 0) return FILEID_INVALID; From 575ef961c6d9859bf8c75b252077307bcecdebcc Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Dec 2024 12:53:01 +0100 Subject: [PATCH 80/85] fs: relax assertions on failure to encode file handles commit 974e3fe0ac61de85015bbe5a4990cf4127b304b2 upstream. Encoding file handles is usually performed by a filesystem >encode_fh() method that may fail for various reasons. The legacy users of exportfs_encode_fh(), namely, nfsd and name_to_handle_at(2) syscall are ready to cope with the possibility of failure to encode a file handle. There are a few other users of exportfs_encode_{fh,fid}() that currently have a WARN_ON() assertion when ->encode_fh() fails. Relax those assertions because they are wrong. The second linked bug report states commit 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles") in v6.6 as the regressing commit, but this is not accurate. The aforementioned commit only increases the chances of the assertion and allows triggering the assertion with the reproducer using overlayfs, inotify and drop_caches. Triggering this assertion was always possible with other filesystems and other reasons of ->encode_fh() failures and more particularly, it was also possible with the exact same reproducer using overlayfs that is mounted with options index=on,nfs_export=on also on kernels < v6.6. Therefore, I am not listing the aforementioned commit as a Fixes commit. Backport hint: this patch will have a trivial conflict applying to v6.6.y, and other trivial conflicts applying to stable kernels < v6.6. Reported-by: syzbot+ec07f6f5ce62b858579f@syzkaller.appspotmail.com Tested-by: syzbot+ec07f6f5ce62b858579f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-unionfs/671fd40c.050a0220.4735a.024f.GAE@google.com/ Reported-by: Dmitry Safonov Closes: https://lore.kernel.org/linux-fsdevel/CAGrbwDTLt6drB9eaUagnQVgdPBmhLfqqxAf3F+Juqy_o6oP8uw@mail.gmail.com/ Cc: stable@vger.kernel.org Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20241219115301.465396-1-amir73il@gmail.com Signed-off-by: Christian Brauner Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman (cherry picked from commit e90e570645b28fb910d9fa058a5ea4a208a3c11d) --- fs/notify/fdinfo.c | 4 +--- fs/overlayfs/copy_up.c | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 5c430736ec12c..26655572975d3 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -51,10 +51,8 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f.handle.handle_bytes >> 2; ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size); - if ((ret == FILEID_INVALID) || (ret < 0)) { - WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); + if ((ret == FILEID_INVALID) || (ret < 0)) return; - } f.handle.handle_type = ret; f.handle.handle_bytes = size * sizeof(u32); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index e97bcf15c689c..18e018cb18117 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -399,9 +399,8 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode, buflen = (dwords << 2); err = -EIO; - if (WARN_ON(fh_type < 0) || - WARN_ON(buflen > MAX_HANDLE_SZ) || - WARN_ON(fh_type == FILEID_INVALID)) + if (fh_type < 0 || fh_type == FILEID_INVALID || + WARN_ON(buflen > MAX_HANDLE_SZ)) goto out_err; fh->fb.version = OVL_FH_VERSION; From 10e7ef8d900ecdbc3595877898dbffa3d3156c1e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jan 2025 14:11:06 +0100 Subject: [PATCH 81/85] Revert "drm/amdgpu: rework resume handling for display (v2)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2daba7d857e48035d71cdd95964350b6d0d51545 which is commit 73dae652dcac776296890da215ee7dec357a1032 upstream. The original patch 73dae652dcac (drm/amdgpu: rework resume handling for display (v2)), was only targeted at kernels 6.11 and newer. It did not apply cleanly to 6.12 so I backported it and it backport landed as 99a02eab8251 ("drm/amdgpu: rework resume handling for display (v2)"), however there was a bug in the backport that was subsequently fixed in 063d380ca28e ("drm/amdgpu: fix backport of commit 73dae652dcac"). None of this was intended for kernels older than 6.11, however the original backport eventually landed in 6.6, 6.1, and 5.15. Please revert the change from kernels 6.6, 6.1, and 5.15. Link: https://lore.kernel.org/r/BL1PR12MB5144D5363FCE6F2FD3502534F7E72@BL1PR12MB5144.namprd12.prod.outlook.com Link: https://lore.kernel.org/r/BL1PR12MB51449ADCFBF2314431F8BCFDF7132@BL1PR12MB5144.namprd12.prod.outlook.com Reported-by: Salvatore Bonaccorso Reported-by: Christian König Reported-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman (cherry picked from commit abbf6ac524eaa9ffd69d0dacab1036540dfb7abc) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 +--------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f4c1cc6df1c83..2e739b80cfccf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3172,7 +3172,7 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * Second resume function for hardware IPs. The list of all the hardware + * First resume function for hardware IPs. The list of all the hardware * IPs that make up the asic is walked and the resume callbacks are run for * all blocks except COMMON, GMC, and IH. resume puts the hardware into a * functional state after a suspend and updates the software state as @@ -3190,7 +3190,6 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) continue; r = adev->ip_blocks[i].version->funcs->resume(adev); @@ -3205,36 +3204,6 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs - * - * @adev: amdgpu_device pointer - * - * Third resume function for hardware IPs. The list of all the hardware - * IPs that make up the asic is walked and the resume callbacks are run for - * all DCE. resume puts the hardware into a functional state after a suspend - * and updates the software state as necessary. This function is also used - * for restoring the GPU after a GPU reset. - * - * Returns 0 on success, negative error code on failure. - */ -static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) - continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) - return r; - } - } - - return 0; -} - /** * amdgpu_device_ip_resume - run resume for hardware IPs * @@ -3261,13 +3230,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) r = amdgpu_device_ip_resume_phase2(adev); - if (r) - return r; - - amdgpu_fence_driver_hw_init(adev); - - r = amdgpu_device_ip_resume_phase3(adev); - return r; } @@ -4267,6 +4229,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); goto exit; } + amdgpu_fence_driver_hw_init(adev); r = amdgpu_device_ip_late_init(adev); if (r) @@ -5036,10 +4999,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) goto out; - r = amdgpu_device_ip_resume_phase3(tmp_adev); - if (r) - goto out; - if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); From f26d64250587e8be68f40aa0550ec804ca35253d Mon Sep 17 00:00:00 2001 From: Youzhong Yang Date: Wed, 10 Jul 2024 10:40:35 -0400 Subject: [PATCH 82/85] nfsd: add list_head nf_gc to struct nfsd_file commit 8e6e2ffa6569a205f1805cbaeca143b556581da6 upstream. nfsd_file_put() in one thread can race with another thread doing garbage collection (running nfsd_file_gc() -> list_lru_walk() -> nfsd_file_lru_cb()): * In nfsd_file_put(), nf->nf_ref is 1, so it tries to do nfsd_file_lru_add(). * nfsd_file_lru_add() returns true (with NFSD_FILE_REFERENCED bit set) * garbage collector kicks in, nfsd_file_lru_cb() clears REFERENCED bit and returns LRU_ROTATE. * garbage collector kicks in again, nfsd_file_lru_cb() now decrements nf->nf_ref to 0, runs nfsd_file_unhash(), removes it from the LRU and adds to the dispose list [list_lru_isolate_move(lru, &nf->nf_lru, head)] * nfsd_file_put() detects NFSD_FILE_HASHED bit is cleared, so it tries to remove the 'nf' from the LRU [if (!nfsd_file_lru_remove(nf))]. The 'nf' has been added to the 'dispose' list by nfsd_file_lru_cb(), so nfsd_file_lru_remove(nf) simply treats it as part of the LRU and removes it, which leads to its removal from the 'dispose' list. * At this moment, 'nf' is unhashed with its nf_ref being 0, and not on the LRU. nfsd_file_put() continues its execution [if (refcount_dec_and_test(&nf->nf_ref))], as nf->nf_ref is already 0, nf->nf_ref is set to REFCOUNT_SATURATED, and the 'nf' gets no chance of being freed. nfsd_file_put() can also race with nfsd_file_cond_queue(): * In nfsd_file_put(), nf->nf_ref is 1, so it tries to do nfsd_file_lru_add(). * nfsd_file_lru_add() sets REFERENCED bit and returns true. * Some userland application runs 'exportfs -f' or something like that, which triggers __nfsd_file_cache_purge() -> nfsd_file_cond_queue(). * In nfsd_file_cond_queue(), it runs [if (!nfsd_file_unhash(nf))], unhash is done successfully. * nfsd_file_cond_queue() runs [if (!nfsd_file_get(nf))], now nf->nf_ref goes to 2. * nfsd_file_cond_queue() runs [if (nfsd_file_lru_remove(nf))], it succeeds. * nfsd_file_cond_queue() runs [if (refcount_sub_and_test(decrement, &nf->nf_ref))] (with "decrement" being 2), so the nf->nf_ref goes to 0, the 'nf' is added to the dispose list [list_add(&nf->nf_lru, dispose)] * nfsd_file_put() detects NFSD_FILE_HASHED bit is cleared, so it tries to remove the 'nf' from the LRU [if (!nfsd_file_lru_remove(nf))], although the 'nf' is not in the LRU, but it is linked in the 'dispose' list, nfsd_file_lru_remove() simply treats it as part of the LRU and removes it. This leads to its removal from the 'dispose' list! * Now nf->ref is 0, unhashed. nfsd_file_put() continues its execution and set nf->nf_ref to REFCOUNT_SATURATED. As shown in the above analysis, using nf_lru for both the LRU list and dispose list can cause the leaks. This patch adds a new list_head nf_gc in struct nfsd_file, and uses it for the dispose list. This does not fix the nfsd_file leaking issue completely. Signed-off-by: Youzhong Yang Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 6b3ac32b813a0abf5d1fa9a3317742202012ff30) --- fs/nfsd/filecache.c | 18 ++++++++++-------- fs/nfsd/filecache.h | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 6f2bcbfde45e6..0ff07d53931f2 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -219,6 +219,7 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, return NULL; INIT_LIST_HEAD(&nf->nf_lru); + INIT_LIST_HEAD(&nf->nf_gc); nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); @@ -396,8 +397,8 @@ nfsd_file_dispose_list(struct list_head *dispose) struct nfsd_file *nf; while (!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + nf = list_first_entry(dispose, struct nfsd_file, nf_gc); + list_del_init(&nf->nf_gc); nfsd_file_free(nf); } } @@ -414,12 +415,12 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) { while(!list_empty(dispose)) { struct nfsd_file *nf = list_first_entry(dispose, - struct nfsd_file, nf_lru); + struct nfsd_file, nf_gc); struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); struct nfsd_fcache_disposal *l = nn->fcache_disposal; spin_lock(&l->lock); - list_move_tail(&nf->nf_lru, &l->freeme); + list_move_tail(&nf->nf_gc, &l->freeme); spin_unlock(&l->lock); queue_work(nfsd_filecache_wq, &l->work); } @@ -476,7 +477,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, /* Refcount went to zero. Unhash it and queue it to the dispose list */ nfsd_file_unhash(nf); - list_lru_isolate_move(lru, &nf->nf_lru, head); + list_lru_isolate(lru, &nf->nf_lru); + list_add(&nf->nf_gc, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; @@ -555,7 +557,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) /* If refcount goes to 0, then put on the dispose list */ if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); + list_add(&nf->nf_gc, dispose); trace_nfsd_file_closing(nf); } } @@ -631,8 +633,8 @@ nfsd_file_close_inode_sync(struct inode *inode) nfsd_file_queue_for_close(inode, &dispose); while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + nf = list_first_entry(&dispose, struct nfsd_file, nf_gc); + list_del_init(&nf->nf_gc); nfsd_file_free(nf); } flush_delayed_fput(); diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index e54165a3224f0..bf7a630f1a456 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -44,6 +44,7 @@ struct nfsd_file { struct nfsd_file_mark *nf_mark; struct list_head nf_lru; + struct list_head nf_gc; struct rcu_head nf_rcu; ktime_t nf_birthtime; }; From ac272d5d8a050b432d18b82414989d66eaa8ccf9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 17 Jan 2025 12:05:51 +0100 Subject: [PATCH 83/85] x86/xen: fix SLS mitigation in xen_hypercall_iret() The backport of upstream patch a2796dff62d6 ("x86/xen: don't do PV iret hypercall through hypercall page") missed to adapt the SLS mitigation config check from CONFIG_MITIGATION_SLS to CONFIG_SLS. Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 85a1b0cad66a88caac12d674998e914438b2d760) --- arch/x86/xen/xen-asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 901b605166834..6231f6efb4ee1 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -221,7 +221,7 @@ SYM_CODE_END(xen_early_idt_handler_array) push %rax mov $__HYPERVISOR_iret, %eax syscall /* Do the IRET. */ -#ifdef CONFIG_MITIGATION_SLS +#ifdef CONFIG_SLS int3 #endif .endm From 02046454dc30935518dfc0e2b0cc2213055a298b Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 7 Nov 2024 10:34:05 +0800 Subject: [PATCH 84/85] net: fix data-races around sk->sk_forward_alloc commit 073d89808c065ac4c672c0a613a71b27a80691cb upstream. Syzkaller reported this warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 16 at net/ipv4/af_inet.c:156 inet_sock_destruct+0x1c5/0x1e0 Modules linked in: CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.12.0-rc5 #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:inet_sock_destruct+0x1c5/0x1e0 Code: 24 12 4c 89 e2 5b 48 c7 c7 98 ec bb 82 41 5c e9 d1 18 17 ff 4c 89 e6 5b 48 c7 c7 d0 ec bb 82 41 5c e9 bf 18 17 ff 0f 0b eb 83 <0f> 0b eb 97 0f 0b eb 87 0f 0b e9 68 ff ff ff 66 66 2e 0f 1f 84 00 RSP: 0018:ffffc9000008bd90 EFLAGS: 00010206 RAX: 0000000000000300 RBX: ffff88810b172a90 RCX: 0000000000000007 RDX: 0000000000000002 RSI: 0000000000000300 RDI: ffff88810b172a00 RBP: ffff88810b172a00 R08: ffff888104273c00 R09: 0000000000100007 R10: 0000000000020000 R11: 0000000000000006 R12: ffff88810b172a00 R13: 0000000000000004 R14: 0000000000000000 R15: ffff888237c31f78 FS: 0000000000000000(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffc63fecac8 CR3: 000000000342e000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x88/0x130 ? inet_sock_destruct+0x1c5/0x1e0 ? report_bug+0x18e/0x1a0 ? handle_bug+0x53/0x90 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? inet_sock_destruct+0x1c5/0x1e0 __sk_destruct+0x2a/0x200 rcu_do_batch+0x1aa/0x530 ? rcu_do_batch+0x13b/0x530 rcu_core+0x159/0x2f0 handle_softirqs+0xd3/0x2b0 ? __pfx_smpboot_thread_fn+0x10/0x10 run_ksoftirqd+0x25/0x30 smpboot_thread_fn+0xdd/0x1d0 kthread+0xd3/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 ---[ end trace 0000000000000000 ]--- Its possible that two threads call tcp_v6_do_rcv()/sk_forward_alloc_add() concurrently when sk->sk_state == TCP_LISTEN with sk->sk_lock unlocked, which triggers a data-race around sk->sk_forward_alloc: tcp_v6_rcv tcp_v6_do_rcv skb_clone_and_charge_r sk_rmem_schedule __sk_mem_schedule sk_forward_alloc_add() skb_set_owner_r sk_mem_charge sk_forward_alloc_add() __kfree_skb skb_release_all skb_release_head_state sock_rfree sk_mem_uncharge sk_forward_alloc_add() sk_mem_reclaim // set local var reclaimable __sk_mem_reclaim sk_forward_alloc_add() In this syzkaller testcase, two threads call tcp_v6_do_rcv() with skb->truesize=768, the sk_forward_alloc changes like this: (cpu 1) | (cpu 2) | sk_forward_alloc ... | ... | 0 __sk_mem_schedule() | | +4096 = 4096 | __sk_mem_schedule() | +4096 = 8192 sk_mem_charge() | | -768 = 7424 | sk_mem_charge() | -768 = 6656 ... | ... | sk_mem_uncharge() | | +768 = 7424 reclaimable=7424 | | | sk_mem_uncharge() | +768 = 8192 | reclaimable=8192 | __sk_mem_reclaim() | | -4096 = 4096 | __sk_mem_reclaim() | -8192 = -4096 != 0 The skb_clone_and_charge_r() should not be called in tcp_v6_do_rcv() when sk->sk_state is TCP_LISTEN, it happens later in tcp_v6_syn_recv_sock(). Fix the same issue in dccp_v6_do_rcv(). Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20241107023405.889239-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Alva Lan Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0bb154d9658e961742fe4c741e42f28fba97801e) --- net/dccp/ipv6.c | 2 +- net/ipv6/tcp_ipv6.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d25e962b18a53..2839ca8053ba6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -616,7 +616,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != DCCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 64bdb6d978eed..f285e52b8b857 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1456,7 +1456,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != TCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -1495,8 +1495,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (nsk != sk) { if (tcp_child_process(sk, nsk, skb)) goto reset; - if (opt_skb) - __kfree_skb(opt_skb); return 0; } } else From 2b997c3a2a06193c795056c68febfc052b62950d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 21 Jan 2025 18:45:21 +0100 Subject: [PATCH 85/85] Linux 6.6.74-rc1 (cherry picked from commit 429148729681ff93db022c19a17ce00dff9c04f9) --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index a349c26c6fba3..834033e7dec6e 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 73 -EXTRAVERSION = +SUBLEVEL = 74 +EXTRAVERSION = -rc1 NAME = Pinguïn Aangedreven # *DOCUMENTATION*