diff --git a/.github/workflows/build-check_aarch64-rt.yml b/.github/workflows/build-check_aarch64-rt.yml new file mode 100644 index 0000000000000..8b99d3dbf088a --- /dev/null +++ b/.github/workflows/build-check_aarch64-rt.yml @@ -0,0 +1,34 @@ +name: aarch64-RT CI +on: + pull_request: + branches: + - '**' + - '!mainline' + +jobs: + kernel-build-job: + runs-on: + labels: kernel-build-arm64 + container: + image: rockylinux:9 + env: + ROCKY_ENV: rocky9 + ports: + - 80 + options: --cpus 8 + steps: + - name: Install tools and Libraries + run: | + dnf groupinstall 'Development Tools' -y + dnf install --enablerepo=crb bc dwarves kernel-devel openssl-devel elfutils-libelf-devel -y + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: "${{ github.event.pull_request.head.sha }}" + fetch-depth: 0 + - name: Build the Kernel + run: | + git config --global --add safe.directory /__w/kernel-src-tree/kernel-src-tree + cp configs/kernel-aarch64-rt-rhel.config .config + make olddefconfig + make -j8 diff --git a/.github/workflows/build-check_aarch64.yml b/.github/workflows/build-check_aarch64.yml new file mode 100644 index 0000000000000..66a64827a6d94 --- /dev/null +++ b/.github/workflows/build-check_aarch64.yml @@ -0,0 +1,34 @@ +name: aarch64 CI +on: + pull_request: + branches: + - '**' + - '!mainline' + +jobs: + kernel-build-job: + runs-on: + labels: kernel-build-arm64 + container: + image: rockylinux:9 + env: + ROCKY_ENV: rocky9 + ports: + - 80 + options: --cpus 8 + steps: + - name: Install tools and Libraries + run: | + dnf groupinstall 'Development Tools' -y + dnf install --enablerepo=crb bc dwarves kernel-devel openssl-devel elfutils-libelf-devel -y + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: "${{ github.event.pull_request.head.sha }}" + fetch-depth: 0 + - name: Build the Kernel + run: | + git config --global --add safe.directory /__w/kernel-src-tree/kernel-src-tree + cp configs/kernel-aarch64-rhel.config .config + make olddefconfig + make -j8 diff --git a/.github/workflows/build-check_x86_64-rt.yml b/.github/workflows/build-check_x86_64-rt.yml new file mode 100644 index 0000000000000..00134eb344714 --- /dev/null +++ b/.github/workflows/build-check_x86_64-rt.yml @@ -0,0 +1,34 @@ +name: x86_64-RT CI +on: + pull_request: + branches: + - '**' + - '!mainline' + +jobs: + kernel-build-job: + runs-on: + labels: kernel-build + container: + image: rockylinux:9 + env: + ROCKY_ENV: rocky9 + ports: + - 80 + options: --cpus 8 + steps: + - name: Install tools and Libraries + run: | + dnf groupinstall 'Development Tools' -y + dnf install --enablerepo=crb bc dwarves kernel-devel openssl-devel elfutils-libelf-devel -y + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: "${{ github.event.pull_request.head.sha }}" + fetch-depth: 0 + - name: Build the Kernel + run: | + git config --global --add safe.directory /__w/kernel-src-tree/kernel-src-tree + cp configs/kernel-x86_64-rt-rhel.config .config + make olddefconfig + make -j8 diff --git a/.github/workflows/build-check_x86_64.yml b/.github/workflows/build-check_x86_64.yml new file mode 100644 index 0000000000000..4c3237f39b4f5 --- /dev/null +++ b/.github/workflows/build-check_x86_64.yml @@ -0,0 +1,34 @@ +name: x86_64 CI +on: + pull_request: + branches: + - '**' + - '!mainline' + +jobs: + kernel-build-job: + runs-on: + labels: kernel-build + container: + image: rockylinux:9 + env: + ROCKY_ENV: rocky9 + ports: + - 80 + options: --cpus 8 + steps: + - name: Install tools and Libraries + run: | + dnf groupinstall 'Development Tools' -y + dnf install --enablerepo=crb bc dwarves kernel-devel openssl-devel elfutils-libelf-devel -y + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: "${{ github.event.pull_request.head.sha }}" + fetch-depth: 0 + - name: Build the Kernel + run: | + git config --global --add safe.directory /__w/kernel-src-tree/kernel-src-tree + cp configs/kernel-x86_64-rhel.config .config + make olddefconfig + make -j8 diff --git a/COPYING-5.14.0-503.19.1.el9_5 b/COPYING-5.14.0-503.22.1.el9_5 similarity index 100% rename from COPYING-5.14.0-503.19.1.el9_5 rename to COPYING-5.14.0-503.22.1.el9_5 diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9ef367d46f38e..0e515d2209539 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4900,6 +4900,14 @@ kthread, but increases that same overhead on each group's NOCB grace-period kthread. + rcutree.nohz_full_patience_delay= [KNL] + On callback-offloaded (rcu_nocbs) CPUs, avoid + disturbing RCU unless the grace period has + reached the specified age in milliseconds. + Defaults to zero. Large values will be capped + at five seconds. All values will be rounded down + to the nearest value representable by jiffies. + rcutree.qhimark= [KNL] Set threshold of queued RCU callbacks beyond which batch limiting is disabled. diff --git a/Makefile.rhelver b/Makefile.rhelver index 99b8eb0796878..4c505f12559ae 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 5 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 503.19.1 +RHEL_RELEASE = 503.22.1 # # ZSTREAM diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f9b3adebcb187..c53795f02faad 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1429,6 +1429,7 @@ static void sve_init_regs(void) } else { fpsimd_to_sve(current); current->thread.fp_type = FP_STATE_SVE; + fpsimd_flush_task_state(current); } } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 7f68cf58b978f..8a3f31b70baa5 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -522,14 +522,23 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (ret) goto out; - dist->ready = true; dist_base = dist->vgic_dist_base; mutex_unlock(&kvm->arch.config_lock); ret = vgic_register_dist_iodev(kvm, dist_base, type); - if (ret) + if (ret) { kvm_err("Unable to register VGIC dist MMIO regions\n"); + goto out_slots; + } + /* + * kvm_io_bus_register_dev() guarantees all readers see the new MMIO + * registration before returning through synchronize_srcu(), which also + * implies a full memory barrier. As such, marking the distributor as + * 'ready' here is guaranteed to be ordered after all vCPUs having seen + * a completely configured distributor. + */ + dist->ready = true; goto out_slots; out: mutex_unlock(&kvm->arch.config_lock); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 97bfead08c0fc..f9e78a2ac8463 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1088,7 +1088,7 @@ cmds(struct pt_regs *excp) memzcan(); break; case 'i': - show_mem(0, NULL); + show_mem(); break; default: termch = cmd; diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 5cc46e0dde620..9725586f42597 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -146,7 +146,7 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start, void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); -int gmap_mark_unmergeable(void); +int s390_disable_cow_sharing(void); void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9281063636a73..d8bbcac89ac31 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 829d68e2c6858..5e7d44716cf82 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -35,6 +35,11 @@ typedef struct { unsigned int uses_cmm:1; /* The gmaps associated with this context are allowed to use huge pages. */ unsigned int allow_gmap_hpage_1m:1; + /* + * The mmu context allows COW-sharing of memory pages (KSM, zeropage). + * Note that COW-sharing during fork() is currently always allowed. + */ + RH_KABI_EXTEND(unsigned int allow_cow_sharing:1) } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 2a38af5a00c2d..8df6d09e9ca87 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -36,6 +36,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; + mm->context.allow_cow_sharing = 1; mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index bbcce04328fc8..bf4d0ae55c495 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -560,10 +560,20 @@ static inline pud_t set_pud_bit(pud_t pud, pgprot_t prot) } /* - * In the case that a guest uses storage keys - * faults should no longer be backed by zero pages + * As soon as the guest uses storage keys or enables PV, we deduplicate all + * mapped shared zeropages and prevent new shared zeropages from getting + * mapped. */ -#define mm_forbids_zeropage mm_has_pgste +#define mm_forbids_zeropage mm_forbids_zeropage +static inline int mm_forbids_zeropage(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (!mm->context.allow_cow_sharing) + return 1; +#endif + return 0; +} + static inline int mm_uses_skeys(struct mm_struct *mm) { #ifdef CONFIG_PGSTE diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0e7bd3873907f..b2e2f9a4163c5 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -442,7 +442,10 @@ static inline int share(unsigned long addr, u16 cmd) if (!uv_call(0, (u64)&uvcb)) return 0; - return -EINVAL; + pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n", + uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare", + uvcb.header.rc, uvcb.header.rrc); + panic("System security cannot be guaranteed unless the system panics now.\n"); } /* diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index b9cf4a30e9464..fc07bc39e6983 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -303,6 +303,8 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) rc = -ENXIO; ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); + if (!ptep) + goto out; if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { page = pte_page(*ptep); rc = -EAGAIN; diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 2a32438e09ceb..74f73141f9b96 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); + rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 5bf3d94e9ddaa..2d1b9977885b5 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -985,6 +985,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -1142,6 +1144,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index b320d12aa0493..3fde45a151f22 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 30dad9f6f80ce..58776da19caf8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -348,20 +348,29 @@ static inline int plo_test_bit(unsigned char nr) return cc == 0; } -static __always_inline void __insn32_query(unsigned int opcode, u8 *query) +static __always_inline void __sortl_query(u8 (*query)[32]) { asm volatile( " lghi 0,0\n" - " lgr 1,%[query]\n" + " la 1,%[query]\n" /* Parameter registers are ignored */ - " .insn rrf,%[opc] << 16,2,4,6,0\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) : - : [query] "d" ((unsigned long)query), [opc] "i" (opcode) - : "cc", "memory", "0", "1"); + : "cc", "0", "1"); } -#define INSN_SORTL 0xb938 -#define INSN_DFLTCC 0xb939 +static __always_inline void __dfltcc_query(u8 (*query)[32]) +{ + asm volatile( + " lghi 0,0\n" + " la 1,%[query]\n" + /* Parameter registers are ignored */ + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} static void __init kvm_s390_cpu_feat_init(void) { @@ -415,10 +424,10 @@ static void __init kvm_s390_cpu_feat_init(void) kvm_s390_available_subfunc.kdsa); if (test_facility(150)) /* SORTL */ - __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl); + __sortl_query(&kvm_s390_available_subfunc.sortl); if (test_facility(151)) /* DFLTCC */ - __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc); + __dfltcc_query(&kvm_s390_available_subfunc.dfltcc); if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); @@ -2630,9 +2639,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) if (r) break; - mmap_write_lock(current->mm); - r = gmap_mark_unmergeable(); - mmap_write_unlock(current->mm); + r = s390_disable_cow_sharing(); if (r) break; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index b2c9f010f0fef..c9ecae830634f 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -361,7 +362,7 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) case -EACCES: return set_validity_icpt(scb_s, 0x003CU); } - scb_s->crycbd = ((__u32)(__u64) &vsie_page->crycb) | CRYCB_FORMAT2; + scb_s->crycbd = (u32)virt_to_phys(&vsie_page->crycb) | CRYCB_FORMAT2; return 0; } @@ -1005,7 +1006,7 @@ static int handle_stfle(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (read_guest_real(vcpu, fac, &vsie_page->fac, stfle_size() * sizeof(u64))) return set_validity_icpt(scb_s, 0x1090U); - scb_s->fac = (__u32)(__u64) &vsie_page->fac; + scb_s->fac = (u32)virt_to_phys(&vsie_page->fac); } return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 405f3ce1e6a4a..8b21c3576ad90 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2547,34 +2547,6 @@ static inline void thp_split_mm(struct mm_struct *mm) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* - * Remove all empty zero pages from the mapping for lazy refaulting - * - This must be called after mm->context.has_pgste is set, to avoid - * future creation of zero pages - * - This must be called after THP was enabled - */ -static int __zap_zero_pages(pmd_t *pmd, unsigned long start, - unsigned long end, struct mm_walk *walk) -{ - unsigned long addr; - - for (addr = start; addr != end; addr += PAGE_SIZE) { - pte_t *ptep; - spinlock_t *ptl; - - ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - if (is_zero_pfn(pte_pfn(*ptep))) - ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID)); - pte_unmap_unlock(ptep, ptl); - } - return 0; -} - -static const struct mm_walk_ops zap_zero_walk_ops = { - .pmd_entry = __zap_zero_pages, - .walk_lock = PGWALK_WRLOCK, -}; - /* * switch on pgstes for its userspace process (for kvm) */ @@ -2592,22 +2564,142 @@ int s390_enable_sie(void) mm->context.has_pgste = 1; /* split thp mappings and disable thp for future mappings */ thp_split_mm(mm); - walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL); mmap_write_unlock(mm); return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); -int gmap_mark_unmergeable(void) +static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + unsigned long *found_addr = walk->private; + + /* Return 1 of the page is a zeropage. */ + if (is_zero_pfn(pte_pfn(*pte))) { + /* + * Shared zeropage in e.g., a FS DAX mapping? We cannot do the + * right thing and likely don't care: FAULT_FLAG_UNSHARE + * currently only works in COW mappings, which is also where + * mm_forbids_zeropage() is checked. + */ + if (!is_cow_mapping(walk->vma->vm_flags)) + return -EFAULT; + + *found_addr = addr; + return 1; + } + return 0; +} + +static const struct mm_walk_ops find_zeropage_ops = { + .pte_entry = find_zeropage_pte_entry, + .walk_lock = PGWALK_WRLOCK, +}; + +/* + * Unshare all shared zeropages, replacing them by anonymous pages. Note that + * we cannot simply zap all shared zeropages, because this could later + * trigger unexpected userfaultfd missing events. + * + * This must be called after mm->context.allow_cow_sharing was + * set to 0, to avoid future mappings of shared zeropages. + * + * mm contracts with s390, that even if mm were to remove a page table, + * and racing with walk_page_range_vma() calling pte_offset_map_lock() + * would fail, it will never insert a page table containing empty zero + * pages once mm_forbids_zeropage(mm) i.e. + * mm->context.allow_cow_sharing is set to 0. + */ +static int __s390_unshare_zeropages(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + unsigned long addr; + vm_fault_t fault; + int rc; + + for_each_vma(vmi, vma) { + /* + * We could only look at COW mappings, but it's more future + * proof to catch unexpected zeropages in other mappings and + * fail. + */ + if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma)) + continue; + addr = vma->vm_start; + +retry: + rc = walk_page_range_vma(vma, addr, vma->vm_end, + &find_zeropage_ops, &addr); + if (rc < 0) + return rc; + else if (!rc) + continue; + + /* addr was updated by find_zeropage_pte_entry() */ + fault = handle_mm_fault(vma, addr, + FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, + NULL); + if (fault & VM_FAULT_OOM) + return -ENOMEM; + /* + * See break_ksm(): even after handle_mm_fault() returned 0, we + * must start the lookup from the current address, because + * handle_mm_fault() may back out if there's any difficulty. + * + * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but + * maybe they could trigger in the future on concurrent + * truncation. In that case, the shared zeropage would be gone + * and we can simply retry and make progress. + */ + cond_resched(); + goto retry; + } + + return 0; +} + +static int __s390_disable_cow_sharing(struct mm_struct *mm) { + int rc; + + if (!mm->context.allow_cow_sharing) + return 0; + + mm->context.allow_cow_sharing = 0; + + /* Replace all shared zeropages by anonymous pages. */ + rc = __s390_unshare_zeropages(mm); /* * Make sure to disable KSM (if enabled for the whole process or * individual VMAs). Note that nothing currently hinders user space * from re-enabling it. */ - return ksm_disable(current->mm); + if (!rc) + rc = ksm_disable(mm); + if (rc) + mm->context.allow_cow_sharing = 1; + return rc; +} + +/* + * Disable most COW-sharing of memory pages for the whole process: + * (1) Disable KSM and unmerge/unshare any KSM pages. + * (2) Disallow shared zeropages and unshare any zerpages that are mapped. + * + * Not that we currently don't bother with COW-shared pages that are shared + * with parent/child processes due to fork(). + */ +int s390_disable_cow_sharing(void) +{ + int rc; + + mmap_write_lock(current->mm); + rc = __s390_disable_cow_sharing(current->mm); + mmap_write_unlock(current->mm); + return rc; } -EXPORT_SYMBOL_GPL(gmap_mark_unmergeable); +EXPORT_SYMBOL_GPL(s390_disable_cow_sharing); /* * Enable storage key handling from now on and initialize the storage @@ -2676,7 +2768,7 @@ int s390_enable_skey(void) goto out_up; mm->context.uses_skeys = 1; - rc = gmap_mark_unmergeable(); + rc = __s390_disable_cow_sharing(mm); if (rc) { mm->context.uses_skeys = 0; goto out_up; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a293541a7e436..99422926efe1b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -820,7 +820,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, default: return -EFAULT; } - +again: ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); @@ -841,6 +841,8 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, spin_unlock(ptl); ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!ptep) + goto again; new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | PGSTE_ACC_BITS | PGSTE_FP_BIT); @@ -929,7 +931,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) default: return -EFAULT; } - +again: ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); @@ -946,6 +948,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) spin_unlock(ptl); ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!ptep) + goto again; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ pgste_val(new) &= ~PGSTE_GR_BIT; @@ -991,7 +995,7 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, default: return -EFAULT; } - +again: ptl = pmd_lock(mm, pmdp); if (!pmd_present(*pmdp)) { spin_unlock(ptl); @@ -1008,6 +1012,8 @@ int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, spin_unlock(ptl); ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!ptep) + goto again; pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; paddr = pte_val(*ptep) & PAGE_MASK; diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index c8e0dd99f3700..bfd156eef91c2 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -83,7 +83,7 @@ static void prom_sync_me(void) "nop\n\t" : : "r" (&trapbase)); prom_printf("PROM SYNC COMMAND...\n"); - show_free_areas(0, NULL); + show_mem(); if (!is_idle_task(current)) { local_irq_enable(); ksys_sync(); diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7a68854f4c17c..d5bea196a724c 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -118,6 +118,54 @@ struct snp_req_data { unsigned int data_npages; }; +#define MAX_AUTHTAG_LEN 32 + +/* See SNP spec SNP_GUEST_REQUEST section for the structure */ +enum msg_type { + SNP_MSG_TYPE_INVALID = 0, + SNP_MSG_CPUID_REQ, + SNP_MSG_CPUID_RSP, + SNP_MSG_KEY_REQ, + SNP_MSG_KEY_RSP, + SNP_MSG_REPORT_REQ, + SNP_MSG_REPORT_RSP, + SNP_MSG_EXPORT_REQ, + SNP_MSG_EXPORT_RSP, + SNP_MSG_IMPORT_REQ, + SNP_MSG_IMPORT_RSP, + SNP_MSG_ABSORB_REQ, + SNP_MSG_ABSORB_RSP, + SNP_MSG_VMRK_REQ, + SNP_MSG_VMRK_RSP, + + SNP_MSG_TYPE_MAX +}; + +enum aead_algo { + SNP_AEAD_INVALID, + SNP_AEAD_AES_256_GCM, +}; + +struct snp_guest_msg_hdr { + u8 authtag[MAX_AUTHTAG_LEN]; + u64 msg_seqno; + u8 rsvd1[8]; + u8 algo; + u8 hdr_version; + u16 hdr_sz; + u8 msg_type; + u8 msg_version; + u16 msg_sz; + u32 rsvd2; + u8 msg_vmpck; + u8 rsvd3[35]; +} __packed; + +struct snp_guest_msg { + struct snp_guest_msg_hdr hdr; + u8 payload[4000]; +} __packed; + struct sev_guest_platform_data { u64 secrets_gpa; }; diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 2bce661fc569c..5a6afda377188 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -326,6 +327,78 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) sev_decommission(handle); } +/* + * This sets up bounce buffers/firmware pages to handle SNP Guest Request + * messages (e.g. attestation requests). See "SNP Guest Request" in the GHCB + * 2.0 specification for more details. + * + * Technically, when an SNP Guest Request is issued, the guest will provide its + * own request/response pages, which could in theory be passed along directly + * to firmware rather than using bounce pages. However, these pages would need + * special care: + * + * - Both pages are from shared guest memory, so they need to be protected + * from migration/etc. occurring while firmware reads/writes to them. At a + * minimum, this requires elevating the ref counts and potentially needing + * an explicit pinning of the memory. This places additional restrictions + * on what type of memory backends userspace can use for shared guest + * memory since there is some reliance on using refcounted pages. + * + * - The response page needs to be switched to Firmware-owned[1] state + * before the firmware can write to it, which can lead to potential + * host RMP #PFs if the guest is misbehaved and hands the host a + * guest page that KVM might write to for other reasons (e.g. virtio + * buffers/etc.). + * + * Both of these issues can be avoided completely by using separately-allocated + * bounce pages for both the request/response pages and passing those to + * firmware instead. So that's what is being set up here. + * + * Guest requests rely on message sequence numbers to ensure requests are + * issued to firmware in the order the guest issues them, so concurrent guest + * requests generally shouldn't happen. But a misbehaved guest could issue + * concurrent guest requests in theory, so a mutex is used to serialize + * access to the bounce buffers. + * + * [1] See the "Page States" section of the SEV-SNP Firmware ABI for more + * details on Firmware-owned pages, along with "RMP and VMPL Access Checks" + * in the APM for details on the related RMP restrictions. + */ +static int snp_guest_req_init(struct kvm *kvm) +{ + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); + struct page *req_page; + + req_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!req_page) + return -ENOMEM; + + sev->guest_resp_buf = snp_alloc_firmware_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!sev->guest_resp_buf) { + __free_page(req_page); + return -EIO; + } + + sev->guest_req_buf = page_address(req_page); + mutex_init(&sev->guest_req_mutex); + + return 0; +} + +static void snp_guest_req_cleanup(struct kvm *kvm) +{ + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); + + if (sev->guest_resp_buf) + snp_free_firmware_page(sev->guest_resp_buf); + + if (sev->guest_req_buf) + __free_page(virt_to_page(sev->guest_req_buf)); + + sev->guest_req_buf = NULL; + sev->guest_resp_buf = NULL; +} + static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, struct kvm_sev_init *data, unsigned long vm_type) @@ -376,6 +449,13 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp, if (ret) goto e_free; + /* This needs to happen after SEV/SNP firmware initialization. */ + if (vm_type == KVM_X86_SNP_VM) { + ret = snp_guest_req_init(kvm); + if (ret) + goto e_free; + } + INIT_LIST_HEAD(&sev->regions_list); INIT_LIST_HEAD(&sev->mirror_vms); sev->need_init = false; @@ -2843,6 +2923,8 @@ void sev_vm_destroy(struct kvm *kvm) } if (sev_snp_guest(kvm)) { + snp_guest_req_cleanup(kvm); + /* * Decomission handles unbinding of the ASID. If it fails for * some unexpected reason, just leak the ASID. @@ -3314,6 +3396,14 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_sw_scratch_is_valid(svm)) goto vmgexit_err; break; + case SVM_VMGEXIT_GUEST_REQUEST: + case SVM_VMGEXIT_EXT_GUEST_REQUEST: + if (!sev_snp_guest(vcpu->kvm) || + !PAGE_ALIGNED(control->exit_info_1) || + !PAGE_ALIGNED(control->exit_info_2) || + control->exit_info_1 == control->exit_info_2) + goto vmgexit_err; + break; default: reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; @@ -3932,6 +4022,103 @@ static int sev_snp_ap_creation(struct vcpu_svm *svm) return ret; } +static int snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) +{ + struct sev_data_snp_guest_request data = {0}; + struct kvm *kvm = svm->vcpu.kvm; + struct kvm_sev_info *sev = to_kvm_sev_info(kvm); + sev_ret_code fw_err = 0; + int ret; + + if (!sev_snp_guest(kvm)) + return -EINVAL; + + mutex_lock(&sev->guest_req_mutex); + + if (kvm_read_guest(kvm, req_gpa, sev->guest_req_buf, PAGE_SIZE)) { + ret = -EIO; + goto out_unlock; + } + + data.gctx_paddr = __psp_pa(sev->snp_context); + data.req_paddr = __psp_pa(sev->guest_req_buf); + data.res_paddr = __psp_pa(sev->guest_resp_buf); + + /* + * Firmware failures are propagated on to guest, but any other failure + * condition along the way should be reported to userspace. E.g. if + * the PSP is dead and commands are timing out. + */ + ret = sev_issue_cmd(kvm, SEV_CMD_SNP_GUEST_REQUEST, &data, &fw_err); + if (ret && !fw_err) + goto out_unlock; + + if (kvm_write_guest(kvm, resp_gpa, sev->guest_resp_buf, PAGE_SIZE)) { + ret = -EIO; + goto out_unlock; + } + + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(0, fw_err)); + + ret = 1; /* resume guest */ + +out_unlock: + mutex_unlock(&sev->guest_req_mutex); + return ret; +} + +static int snp_handle_ext_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp_gpa) +{ + struct kvm *kvm = svm->vcpu.kvm; + u8 msg_type; + + if (!sev_snp_guest(kvm)) + return -EINVAL; + + if (kvm_read_guest(kvm, req_gpa + offsetof(struct snp_guest_msg_hdr, msg_type), + &msg_type, 1)) + return -EIO; + + /* + * As per GHCB spec, requests of type MSG_REPORT_REQ also allow for + * additional certificate data to be provided alongside the attestation + * report via the guest-provided data pages indicated by RAX/RBX. The + * certificate data is optional and requires additional KVM enablement + * to provide an interface for userspace to provide it, but KVM still + * needs to be able to handle extended guest requests either way. So + * provide a stub implementation that will always return an empty + * certificate table in the guest-provided data pages. + */ + if (msg_type == SNP_MSG_REPORT_REQ) { + struct kvm_vcpu *vcpu = &svm->vcpu; + u64 data_npages; + gpa_t data_gpa; + + if (!kvm_ghcb_rax_is_valid(svm) || !kvm_ghcb_rbx_is_valid(svm)) + goto request_invalid; + + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; + + if (!PAGE_ALIGNED(data_gpa)) + goto request_invalid; + + /* + * As per GHCB spec (see "SNP Extended Guest Request"), the + * certificate table is terminated by 24-bytes of zeroes. + */ + if (data_npages && kvm_clear_guest(kvm, data_gpa, 24)) + return -EIO; + } + + return snp_handle_guest_req(svm, req_gpa, resp_gpa); + +request_invalid: + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); + return 1; /* resume guest */ +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4206,6 +4393,12 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ret = 1; break; + case SVM_VMGEXIT_GUEST_REQUEST: + ret = snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2); + break; + case SVM_VMGEXIT_EXT_GUEST_REQUEST: + ret = snp_handle_ext_guest_req(svm, control->exit_info_1, control->exit_info_2); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 6a3079e677254..57ff79bc02a40 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -95,6 +95,9 @@ struct kvm_sev_info { struct misc_cg *misc_cg; /* For misc cgroup accounting */ atomic_t migration_in_progress; void *snp_context; /* SNP guest context page */ + void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */ + void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */ + struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */ }; struct kvm_svm { diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/527ed4f7.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/527ed4f7.failed new file mode 100644 index 0000000000000..3a6a00fcab076 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/527ed4f7.failed @@ -0,0 +1,121 @@ +mm: remove arguments of show_mem() + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Kefeng Wang +commit 527ed4f7d902d362471a93e1a4afb604c18ceb48 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/527ed4f7.failed + +All callers of show_mem() pass 0 and NULL, so we can remove the two +arguments by directly calling __show_mem(0, NULL, MAX_NR_ZONES - 1) in +show_mem(). + +Link: https://lkml.kernel.org/r/20230630062253.189440-1-wangkefeng.wang@huawei.com + Signed-off-by: Kefeng Wang + Cc: Christophe Leroy + Cc: Greg Kroah-Hartman + Cc: Matthew Wilcox + Cc: Michael Ellerman + Cc: Nicholas Piggin + Signed-off-by: Andrew Morton +(cherry picked from commit 527ed4f7d902d362471a93e1a4afb604c18ceb48) + Signed-off-by: Jonathan Maple + +# Conflicts: +# init/initramfs.c +diff --cc init/initramfs.c +index a09965934876,8d0fd946cdd2..000000000000 +--- a/init/initramfs.c ++++ b/init/initramfs.c +@@@ -50,15 -60,8 +50,20 @@@ static void __init error(char *x + message = x; + } + +++<<<<<<< HEAD + +static void panic_show_mem(const char *fmt, ...) + +{ + + va_list args; + + + + show_mem(0, NULL); + + va_start(args, fmt); + + panic(fmt, args); + + va_end(args); + +} +++======= ++ #define panic_show_mem(fmt, ...) \ ++ ({ show_mem(); panic(fmt, ##__VA_ARGS__); }) +++>>>>>>> 527ed4f7d902 (mm: remove arguments of show_mem()) + + /* link hash */ + +diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c +index 97bfead08c0f..f9e78a2ac846 100644 +--- a/arch/powerpc/xmon/xmon.c ++++ b/arch/powerpc/xmon/xmon.c +@@ -1088,7 +1088,7 @@ cmds(struct pt_regs *excp) + memzcan(); + break; + case 'i': +- show_mem(0, NULL); ++ show_mem(); + break; + default: + termch = cmd; +diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c +index 1bbbe490caec..7016d23cf4bc 100644 +--- a/drivers/tty/sysrq.c ++++ b/drivers/tty/sysrq.c +@@ -342,7 +342,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = { + + static void sysrq_handle_showmem(int key) + { +- show_mem(0, NULL); ++ show_mem(); + } + static const struct sysrq_key_op sysrq_showmem_op = { + .handler = sysrq_handle_showmem, +diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c +index 4ab925a05454..6e8eeffbdae4 100644 +--- a/drivers/tty/vt/keyboard.c ++++ b/drivers/tty/vt/keyboard.c +@@ -606,7 +606,7 @@ static void fn_scroll_back(struct vc_data *vc) + + static void fn_show_mem(struct vc_data *vc) + { +- show_mem(0, NULL); ++ show_mem(); + } + + static void fn_show_state(struct vc_data *vc) +diff --git a/include/linux/mm.h b/include/linux/mm.h +index d36af5babad1..d2e33464ce75 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -3046,9 +3046,9 @@ extern void mem_init(void); + extern void __init mmap_init(void); + + extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); +-static inline void show_mem(unsigned int flags, nodemask_t *nodemask) ++static inline void show_mem(void) + { +- __show_mem(flags, nodemask, MAX_NR_ZONES - 1); ++ __show_mem(0, NULL, MAX_NR_ZONES - 1); + } + extern long si_mem_available(void); + extern void si_meminfo(struct sysinfo * val); +* Unmerged path init/initramfs.c +diff --git a/kernel/panic.c b/kernel/panic.c +index 638b57d50a4f..54493e35519a 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -215,7 +215,7 @@ static void panic_print_sys_info(bool console_flush) + show_state(); + + if (panic_print & PANIC_PRINT_MEM_INFO) +- show_mem(0, NULL); ++ show_mem(); + + if (panic_print & PANIC_PRINT_TIMER_INFO) + sysrq_timer_list_show(); diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/5de1fce3.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/5de1fce3.failed new file mode 100644 index 0000000000000..cb18a6596c324 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/5de1fce3.failed @@ -0,0 +1,100 @@ +bnxt_en: Add support for user configured RSS key + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Pavan Chebbi +commit 5de1fce3369564ca6b9eed339838c51ec6290270 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/5de1fce3.failed + +Store the user configured or generated Toeplitz key in +bp->rss_hash_key. The key stays constant across ifdown/ifup +unless updated by the user. + + Signed-off-by: Pavan Chebbi + Signed-off-by: Michael Chan +Link: https://lore.kernel.org/r/20240205223202.25341-12-michael.chan@broadcom.com + Signed-off-by: Jakub Kicinski +(cherry picked from commit 5de1fce3369564ca6b9eed339838c51ec6290270) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/broadcom/bnxt/bnxt.c +diff --cc drivers/net/ethernet/broadcom/bnxt/bnxt.c +index e15f15706158,b66002171e36..000000000000 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@@ -4177,10 -4242,32 +4177,39 @@@ static void bnxt_init_vnics(struct bnx + vnic->fw_l2_ctx_id = INVALID_HW_RING_ID; + + if (bp->vnic_info[i].rss_hash_key) { +++<<<<<<< HEAD + + if (i == 0) + + get_random_bytes(vnic->rss_hash_key, + + HW_HASH_KEY_SIZE); + + else +++======= ++ if (!i) { ++ u8 *key = (void *)vnic->rss_hash_key; ++ int k; ++ ++ if (!bp->rss_hash_key_valid && ++ !bp->rss_hash_key_updated) { ++ get_random_bytes(bp->rss_hash_key, ++ HW_HASH_KEY_SIZE); ++ bp->rss_hash_key_updated = true; ++ } ++ ++ memcpy(vnic->rss_hash_key, bp->rss_hash_key, ++ HW_HASH_KEY_SIZE); ++ ++ if (!bp->rss_hash_key_updated) ++ continue; ++ ++ bp->rss_hash_key_updated = false; ++ bp->rss_hash_key_valid = true; ++ ++ bp->toeplitz_prefix = 0; ++ for (k = 0; k < 8; k++) { ++ bp->toeplitz_prefix <<= 8; ++ bp->toeplitz_prefix |= key[k]; ++ } ++ } else { +++>>>>>>> 5de1fce33695 (bnxt_en: Add support for user configured RSS key) + memcpy(vnic->rss_hash_key, + bp->vnic_info[0].rss_hash_key, + HW_HASH_KEY_SIZE); +* Unmerged path drivers/net/ethernet/broadcom/bnxt/bnxt.c +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +index 936229d39a0f..f236f09e93be 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@ -2164,6 +2164,10 @@ struct bnxt { + #define BNXT_RSS_CAP_NEW_RSS_CAP BIT(2) + #define BNXT_RSS_CAP_RSS_TCAM BIT(3) + ++ u8 rss_hash_key[HW_HASH_KEY_SIZE]; ++ u8 rss_hash_key_valid:1; ++ u8 rss_hash_key_updated:1; ++ + u16 max_mtu; + u16 tso_max_segs; + u8 max_tc; +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +index 514f8ae1ae6f..5c8e044cfc3b 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +@@ -1361,8 +1361,10 @@ static int bnxt_set_rxfh(struct net_device *dev, + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + +- if (rxfh->key) +- return -EOPNOTSUPP; ++ if (rxfh->key) { ++ memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE); ++ bp->rss_hash_key_updated = true; ++ } + + if (rxfh->indir) { + u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev); diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/7c6f714d.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/7c6f714d.failed new file mode 100644 index 0000000000000..2d042a61f5eff --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/7c6f714d.failed @@ -0,0 +1,124 @@ +gfs2: Fix unlinked inode cleanup + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Andreas Gruenbacher +commit 7c6f714d88475ceae5342264858a641eafa19632 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/7c6f714d.failed + +Before commit f0e56edc2ec7 ("gfs2: Split the two kinds of glock "delete" +work"), function delete_work_func() was used to trigger the eviction of +in-memory inodes from remote as well as deleting unlinked inodes at a +later point. These two kinds of work were then split into two kinds of +work, and the two places in the code were deferred deletion of inodes is +required accidentally ended up queuing the wrong kind of work. This +caused unlinked inodes to be left behind, which could in the worst case +fill up filesystems and require a filesystem check to recover. + +Fix that by queuing the right kind of work in try_rgrp_unlink() and +gfs2_drop_inode(). + +Fixes: f0e56edc2ec7 ("gfs2: Split the two kinds of glock "delete" work") + Signed-off-by: Andreas Gruenbacher +(cherry picked from commit 7c6f714d88475ceae5342264858a641eafa19632) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/glock.h +diff --cc fs/gfs2/glock.h +index 944705747cd3,63e101d448e9..000000000000 +--- a/fs/gfs2/glock.h ++++ b/fs/gfs2/glock.h +@@@ -265,28 -242,28 +265,42 @@@ static inline int gfs2_glock_nq_init(st + return error; + } + +++<<<<<<< HEAD + +extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); + +extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); + +extern bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); + +extern void gfs2_cancel_delete_work(struct gfs2_glock *gl); + +extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp); + +extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); + +extern void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); + +extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); + +extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); + +extern void gfs2_glock_free(struct gfs2_glock *gl); + +extern void gfs2_glock_free_later(struct gfs2_glock *gl); +++======= ++ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); ++ void gfs2_glock_complete(struct gfs2_glock *gl, int ret); ++ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); ++ bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later); ++ void gfs2_cancel_delete_work(struct gfs2_glock *gl); ++ void gfs2_flush_delete_work(struct gfs2_sbd *sdp); ++ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); ++ void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); ++ void gfs2_glock_thaw(struct gfs2_sbd *sdp); ++ void gfs2_glock_free(struct gfs2_glock *gl); ++ void gfs2_glock_free_later(struct gfs2_glock *gl); +++>>>>>>> 7c6f714d8847 (gfs2: Fix unlinked inode cleanup) + + -int __init gfs2_glock_init(void); + -void gfs2_glock_exit(void); + +extern int __init gfs2_glock_init(void); + +extern void gfs2_glock_exit(void); + + -void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); + -void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); + -void gfs2_register_debugfs(void); + -void gfs2_unregister_debugfs(void); + +extern void gfs2_create_debugfs_file(struct gfs2_sbd *sdp); + +extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); + +extern void gfs2_register_debugfs(void); + +extern void gfs2_unregister_debugfs(void); + + -void glock_set_object(struct gfs2_glock *gl, void *object); + -void glock_clear_object(struct gfs2_glock *gl, void *object); + +extern void glock_set_object(struct gfs2_glock *gl, void *object); + +extern void glock_clear_object(struct gfs2_glock *gl, void *object); + + extern const struct lm_lockops gfs2_dlm_ops; + +diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c +index 49ea756972ba..d7deca54d1f5 100644 +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@ -1024,7 +1024,7 @@ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) + &gl->gl_delete, 0); + } + +-static bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) ++bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) + { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + unsigned long delay; +* Unmerged path fs/gfs2/glock.h +diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c +index c79c74121ec0..7df5f97de167 100644 +--- a/fs/gfs2/rgrp.c ++++ b/fs/gfs2/rgrp.c +@@ -1881,7 +1881,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip + */ + ip = gl->gl_object; + +- if (ip || !gfs2_queue_try_to_evict(gl)) ++ if (ip || !gfs2_queue_verify_delete(gl, false)) + gfs2_glock_put(gl); + else + found++; +diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c +index 0812e1b86fed..406ce2690606 100644 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@ -1053,7 +1053,7 @@ static int gfs2_drop_inode(struct inode *inode) + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + + gfs2_glock_hold(gl); +- if (!gfs2_queue_try_to_evict(gl)) ++ if (!gfs2_queue_verify_delete(gl, true)) + gfs2_glock_put_async(gl); + return 0; + } diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/8c21c2c7.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/8c21c2c7.failed new file mode 100644 index 0000000000000..2e2b066384547 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/8c21c2c7.failed @@ -0,0 +1,89 @@ +gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Andreas Gruenbacher +commit 8c21c2c71e668a5eed9fe9981a2306f9178e6c3e +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/8c21c2c7.failed + +Move calls to gfs2_queue_verify_delete() into gfs2_evict_inode(). + + Signed-off-by: Andreas Gruenbacher +(cherry picked from commit 8c21c2c71e668a5eed9fe9981a2306f9178e6c3e) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/super.c +diff --cc fs/gfs2/super.c +index 245ffcb2e060,340bc21de218..000000000000 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@@ -1505,10 -1498,17 +1505,24 @@@ static void gfs2_evict_inode(struct ino + goto out; + + gfs2_holder_mark_uninitialized(&gh); +++<<<<<<< HEAD + + ret = evict_should_delete(inode, &gh); + + if (ret == SHOULD_DEFER_EVICTION) + + goto out; + + if (ret == SHOULD_DELETE_DINODE) +++======= ++ behavior = evict_should_delete(inode, &gh); ++ if (behavior == EVICT_SHOULD_DEFER_DELETE && ++ !test_bit(SDF_KILL, &sdp->sd_flags)) { ++ struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; ++ ++ gfs2_glock_hold(io_gl); ++ if (!gfs2_queue_verify_delete(io_gl, true)) ++ gfs2_glock_put(io_gl); ++ goto out; ++ } ++ if (behavior == EVICT_SHOULD_DELETE) +++>>>>>>> 8c21c2c71e66 (gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode) + ret = evict_unlinked_inode(inode); + else + ret = evict_linked_inode(inode); +diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c +index 65d0f1b4bee1..737729a2c99a 100644 +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@ -971,10 +971,9 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) + gfs2_holder_uninit(&gh); + } + +-static bool gfs2_try_evict(struct gfs2_glock *gl) ++static void gfs2_try_evict(struct gfs2_glock *gl) + { + struct gfs2_inode *ip; +- bool evicted = false; + + /* + * If there is contention on the iopen glock and we have an inode, try +@@ -1009,9 +1008,7 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) + gfs2_glock_poke(ip->i_gl); + iput(&ip->i_inode); + } +- evicted = !ip; + } +- return evicted; + } + + bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) +@@ -1060,13 +1057,7 @@ static void delete_work_func(struct work_struct *work) + * care about compatibility with such nodes, we can skip this + * step entirely. + */ +- if (gfs2_try_evict(gl)) { +- if (!test_bit(SDF_KILL, &sdp->sd_flags)) { +- gfs2_glock_hold(gl); +- if (!gfs2_queue_verify_delete(gl, true)) +- gfs2_glock_put(gl); +- } +- } ++ gfs2_try_evict(gl); + } + + if (verify_delete) { +* Unmerged path fs/gfs2/super.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a6033333.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a6033333.failed new file mode 100644 index 0000000000000..ee026db16eb5d --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a6033333.failed @@ -0,0 +1,80 @@ +gfs2: Update to the evict / remote delete documentation + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Andreas Gruenbacher +commit a6033333ccce01ecada39b3ddabc03fd967e60c0 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a6033333.failed + +Try to be a bit more clear and remove some duplications. We cannot +actually get rid of the verification step eventually, so remove the +comment saying so. + + Signed-off-by: Andreas Gruenbacher +(cherry picked from commit a6033333ccce01ecada39b3ddabc03fd967e60c0) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/glock.c +diff --cc fs/gfs2/glock.c +index 65d0f1b4bee1,8fff36846145..000000000000 +--- a/fs/gfs2/glock.c ++++ b/fs/gfs2/glock.c +@@@ -1042,32 -1030,8 +1045,37 @@@ static void delete_work_func(struct wor + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); + +++<<<<<<< HEAD + + if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) { + + /* + + * If we can evict the inode, give the remote node trying to + + * delete the inode some time before verifying that the delete + + * has happened. Otherwise, if we cause contention on the inode glock + + * immediately, the remote node will think that we still have + + * the inode in use, and so it will give up waiting. + + * + + * If we can't evict the inode, signal to the remote node that + + * the inode is still in use. We'll later try to delete the + + * inode locally in gfs2_evict_inode. + + * + + * FIXME: We only need to verify that the remote node has + + * deleted the inode because nodes before this remote delete + + * rework won't cooperate. At a later time, when we no longer + + * care about compatibility with such nodes, we can skip this + + * step entirely. + + */ + + if (gfs2_try_evict(gl)) { + + if (!test_bit(SDF_KILL, &sdp->sd_flags)) { + + gfs2_glock_hold(gl); + + if (!gfs2_queue_verify_delete(gl, true)) + + gfs2_glock_put(gl); + + } + + } + + } +++======= ++ if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) ++ gfs2_try_evict(gl); +++>>>>>>> a6033333ccce (gfs2: Update to the evict / remote delete documentation) + + if (verify_delete) { + u64 no_addr = gl->gl_name.ln_number; +* Unmerged path fs/gfs2/glock.c +diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c +index 245ffcb2e060..92253b406649 100644 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@ -1286,9 +1286,9 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) + * exclusive access to the iopen glock here. + * + * Otherwise, the other nodes holding the lock will be notified about +- * our locking request. If they do not have the inode open, they are +- * expected to evict the cached inode and release the lock, allowing us +- * to proceed. ++ * our locking request (see iopen_go_callback()). If they do not have ++ * the inode open, they are expected to evict the cached inode and ++ * release the lock, allowing us to proceed. + * + * Otherwise, if they cannot evict the inode, they are expected to poke + * the inode glock (note: not the iopen glock). We will notice that diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a94dafe8.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a94dafe8.failed new file mode 100644 index 0000000000000..4feade7ee739f --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a94dafe8.failed @@ -0,0 +1,49 @@ +gfs2: Return enum evict_behavior from gfs2_upgrade_iopen_glock + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Andreas Gruenbacher +commit a94dafe87d5fdded799fc25b82b123fb93959421 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/a94dafe8.failed + +In case an iopen glock cannot be upgraded, function +gfs2_upgrade_iopen_glock() needs to communicate to gfs2_evict_inode() +whether deleting the inode should be deferred or skipped altogether. +Change the function to return the appropriate enum evict_behavior value +to indicate that. + + Signed-off-by: Andreas Gruenbacher +(cherry picked from commit a94dafe87d5fdded799fc25b82b123fb93959421) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/super.c +diff --cc fs/gfs2/super.c +index 245ffcb2e060,46d325c2ab88..000000000000 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@@ -1375,12 -1364,15 +1380,19 @@@ static enum dinode_demise evict_should_ + should_delete: + if (gfs2_holder_initialized(&ip->i_iopen_gh) && + test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { +- if (!gfs2_upgrade_iopen_glock(inode)) { ++ enum evict_behavior behavior = ++ gfs2_upgrade_iopen_glock(inode); ++ ++ if (behavior != EVICT_SHOULD_DELETE) { + gfs2_holder_uninit(&ip->i_iopen_gh); +++<<<<<<< HEAD + + return SHOULD_NOT_DELETE_DINODE; +++======= ++ return behavior; +++>>>>>>> a94dafe87d5f (gfs2: Return enum evict_behavior from gfs2_upgrade_iopen_glock) + } + } + - return EVICT_SHOULD_DELETE; + + return SHOULD_DELETE_DINODE; + } + + /** +* Unmerged path fs/gfs2/super.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c5b7a240.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c5b7a240.failed new file mode 100644 index 0000000000000..63982402f50ff --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c5b7a240.failed @@ -0,0 +1,57 @@ +gfs2: Only defer deletes when we have an iopen glock + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Andreas Gruenbacher +commit c5b7a2400edc458b22133d5e5394bea26eab1923 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c5b7a240.failed + +The mechanism to defer deleting unlinked inodes is tied to +delete_work_func(), which is tied to iopen glocks. When we don't have +an iopen glock, we must carry out deletes immediately instead. + +Fixes a NULL pointer dereference in gfs2_evict_inode(). + +Fixes: 8c21c2c71e66 ("gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode") + Signed-off-by: Andreas Gruenbacher +(cherry picked from commit c5b7a2400edc458b22133d5e5394bea26eab1923) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/super.c +diff --cc fs/gfs2/super.c +index 245ffcb2e060,92a3b6ddafdc..000000000000 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@@ -1504,11 -1498,20 +1504,28 @@@ static void gfs2_evict_inode(struct ino + if (!sdp->sd_jdesc) + goto out; + +++<<<<<<< HEAD + + gfs2_holder_mark_uninitialized(&gh); + + ret = evict_should_delete(inode, &gh); + + if (ret == SHOULD_DEFER_EVICTION) + + goto out; + + if (ret == SHOULD_DELETE_DINODE) +++======= ++ behavior = evict_should_delete(inode, &gh); ++ if (behavior == EVICT_SHOULD_DEFER_DELETE && ++ !test_bit(SDF_KILL, &sdp->sd_flags)) { ++ struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; ++ ++ if (io_gl) { ++ gfs2_glock_hold(io_gl); ++ if (!gfs2_queue_verify_delete(io_gl, true)) ++ gfs2_glock_put(io_gl); ++ goto out; ++ } ++ behavior = EVICT_SHOULD_DELETE; ++ } ++ if (behavior == EVICT_SHOULD_DELETE) +++>>>>>>> c5b7a2400edc (gfs2: Only defer deletes when we have an iopen glock) + ret = evict_unlinked_inode(inode); + else + ret = evict_linked_inode(inode); +* Unmerged path fs/gfs2/super.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c79ba4be.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c79ba4be.failed new file mode 100644 index 0000000000000..1cfa896d400ac --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c79ba4be.failed @@ -0,0 +1,54 @@ +gfs2: Rename dinode_demise to evict_behavior + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Andreas Gruenbacher +commit c79ba4be351a06e0ac4c51143a83023bb37888d6 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/c79ba4be.failed + +Rename enum dinode_demise to evict_behavior and its items +SHOULD_DELETE_DINODE to EVICT_SHOULD_DELETE, +SHOULD_NOT_DELETE_DINODE to EVICT_SHOULD_SKIP_DELETE, and +SHOULD_DEFER_EVICTION to EVICT_SHOULD_DEFER_DELETE. + +In gfs2_evict_inode(), add a separate variable of type enum +evict_behavior instead of implicitly casting to int. + + Signed-off-by: Andreas Gruenbacher +(cherry picked from commit c79ba4be351a06e0ac4c51143a83023bb37888d6) + Signed-off-by: Jonathan Maple + +# Conflicts: +# fs/gfs2/super.c +diff --cc fs/gfs2/super.c +index 245ffcb2e060,f43b238ccaf1..000000000000 +--- a/fs/gfs2/super.c ++++ b/fs/gfs2/super.c +@@@ -1355,16 -1341,14 +1355,22 @@@ static enum evict_behavior evict_should + } + + if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino)) +- return SHOULD_NOT_DELETE_DINODE; ++ return EVICT_SHOULD_SKIP_DELETE; + ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); + if (ret) +- return SHOULD_NOT_DELETE_DINODE; ++ return EVICT_SHOULD_SKIP_DELETE; + +++<<<<<<< HEAD + + if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) { + + ret = gfs2_instantiate(gh); + + if (ret) + + return SHOULD_NOT_DELETE_DINODE; + + } +++======= ++ ret = gfs2_instantiate(gh); ++ if (ret) ++ return EVICT_SHOULD_SKIP_DELETE; +++>>>>>>> c79ba4be351a (gfs2: Rename dinode_demise to evict_behavior) + + /* + * The inode may have been recreated in the meantime. +* Unmerged path fs/gfs2/super.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/ce7356ae.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/ce7356ae.failed new file mode 100644 index 0000000000000..eaf568486b651 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/ce7356ae.failed @@ -0,0 +1,55 @@ +mptcp: cope racing subflow creation in mptcp_rcv_space_adjust + +jira LE-2290 +cve CVE-2024-53122 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +Rebuild_CHGLOG: - CVE-2024-53122 mptcp: cope racing subflow creation in mptcp_rcv_space_adjust (Patrick Talbert) [RHEL-70083 RHEL-69670] {CVE-2024-53122} +Rebuild_FUZZ: 89.05% +commit-author Paolo Abeni +commit ce7356ae35943cc6494cc692e62d51a734062b7d +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/ce7356ae.failed + +Additional active subflows - i.e. created by the in kernel path +manager - are included into the subflow list before starting the +3whs. + +A racing recvmsg() spooling data received on an already established +subflow would unconditionally call tcp_cleanup_rbuf() on all the +current subflows, potentially hitting a divide by zero error on +the newly created ones. + +Explicitly check that the subflow is in a suitable state before +invoking tcp_cleanup_rbuf(). + +Fixes: c76c6956566f ("mptcp: call tcp_cleanup_rbuf on subflows") + Signed-off-by: Paolo Abeni + Reviewed-by: Matthieu Baerts (NGI0) +Link: https://patch.msgid.link/02374660836e1b52afc91966b7535c8c5f7bafb0.1731060874.git.pabeni@redhat.com + Signed-off-by: Jakub Kicinski +(cherry picked from commit ce7356ae35943cc6494cc692e62d51a734062b7d) + Signed-off-by: Jonathan Maple + +# Conflicts: +# net/mptcp/protocol.c +diff --cc net/mptcp/protocol.c +index bc1ce6b89e7f,48d480982b78..000000000000 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@@ -2057,8 -2081,9 +2057,14 @@@ static void mptcp_rcv_space_adjust(stru + ssk = mptcp_subflow_tcp_sock(subflow); + slow = lock_sock_fast(ssk); + WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); +++<<<<<<< HEAD + + tcp_sk(ssk)->window_clamp = window_clamp; + + tcp_cleanup_rbuf(ssk, 1); +++======= ++ WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp); ++ if (tcp_can_send_ack(ssk)) ++ tcp_cleanup_rbuf(ssk, 1); +++>>>>>>> ce7356ae3594 (mptcp: cope racing subflow creation in mptcp_rcv_space_adjust) + unlock_sock_fast(ssk, slow); + } + } +* Unmerged path net/mptcp/protocol.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/d3c98285.failed b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/d3c98285.failed new file mode 100644 index 0000000000000..2a9cccc053226 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/d3c98285.failed @@ -0,0 +1,412 @@ +bnxt_en: Add function to calculate Toeplitz hash + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.21.1.el9_5 +commit-author Pavan Chebbi +commit d3c982851c15ff1c5187a6188710daa7d0db7fe4 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/d3c98285.failed + +For ntuple filters added by aRFS, the Toeplitz hash calculated by our +NIC is available and is used to store the ntuple filter for quick +retrieval. In the next patches, user defined ntuple filter support +will be added and we need to calculate the same hash for these +filters. The same hash function needs to be used so we can detect +duplicates. + +Add the function bnxt_toeplitz() to calculate the Toeplitz hash for +user defined ntuple filters. bnxt_toeplitz() uses the same Toeplitz +key and the same key length as the NIC. + +bnxt_get_ntp_filter_idx() is added to return the hash index. For +aRFS, the hash comes from the NIC. For user defined ntuple, we call +bnxt_toeplitz() to calculate the hash index. + + Reviewed-by: Andy Gospodarek + Signed-off-by: Pavan Chebbi + Signed-off-by: Michael Chan + Signed-off-by: David S. Miller +(cherry picked from commit d3c982851c15ff1c5187a6188710daa7d0db7fe4) + Signed-off-by: Jonathan Maple + +# Conflicts: +# drivers/net/ethernet/broadcom/bnxt/bnxt.c +# drivers/net/ethernet/broadcom/bnxt/bnxt.h +diff --cc drivers/net/ethernet/broadcom/bnxt/bnxt.c +index e15f15706158,e9b382832a14..000000000000 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@@ -5252,6 -5338,278 +5261,281 @@@ static int bnxt_hwrm_cfa_l2_set_rx_mask + return hwrm_req_send_silent(bp, req); + } + +++<<<<<<< HEAD +++======= ++ void bnxt_del_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr) ++ { ++ if (!atomic_dec_and_test(&fltr->refcnt)) ++ return; ++ spin_lock_bh(&bp->ntp_fltr_lock); ++ hlist_del_rcu(&fltr->base.hash); ++ if (fltr->base.flags) { ++ clear_bit(fltr->base.sw_id, bp->ntp_fltr_bmap); ++ bp->ntp_fltr_count--; ++ } ++ spin_unlock_bh(&bp->ntp_fltr_lock); ++ kfree_rcu(fltr, base.rcu); ++ } ++ ++ static struct bnxt_l2_filter *__bnxt_lookup_l2_filter(struct bnxt *bp, ++ struct bnxt_l2_key *key, ++ u32 idx) ++ { ++ struct hlist_head *head = &bp->l2_fltr_hash_tbl[idx]; ++ struct bnxt_l2_filter *fltr; ++ ++ hlist_for_each_entry_rcu(fltr, head, base.hash) { ++ struct bnxt_l2_key *l2_key = &fltr->l2_key; ++ ++ if (ether_addr_equal(l2_key->dst_mac_addr, key->dst_mac_addr) && ++ l2_key->vlan == key->vlan) ++ return fltr; ++ } ++ return NULL; ++ } ++ ++ static struct bnxt_l2_filter *bnxt_lookup_l2_filter(struct bnxt *bp, ++ struct bnxt_l2_key *key, ++ u32 idx) ++ { ++ struct bnxt_l2_filter *fltr = NULL; ++ ++ rcu_read_lock(); ++ fltr = __bnxt_lookup_l2_filter(bp, key, idx); ++ if (fltr) ++ atomic_inc(&fltr->refcnt); ++ rcu_read_unlock(); ++ return fltr; ++ } ++ ++ #define BNXT_IPV4_4TUPLE(bp, fkeys) \ ++ (((fkeys)->basic.ip_proto == IPPROTO_TCP && \ ++ (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4) || \ ++ ((fkeys)->basic.ip_proto == IPPROTO_UDP && \ ++ (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4)) ++ ++ #define BNXT_IPV6_4TUPLE(bp, fkeys) \ ++ (((fkeys)->basic.ip_proto == IPPROTO_TCP && \ ++ (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6) || \ ++ ((fkeys)->basic.ip_proto == IPPROTO_UDP && \ ++ (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6)) ++ ++ static u32 bnxt_get_rss_flow_tuple_len(struct bnxt *bp, struct flow_keys *fkeys) ++ { ++ if (fkeys->basic.n_proto == htons(ETH_P_IP)) { ++ if (BNXT_IPV4_4TUPLE(bp, fkeys)) ++ return sizeof(fkeys->addrs.v4addrs) + ++ sizeof(fkeys->ports); ++ ++ if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4) ++ return sizeof(fkeys->addrs.v4addrs); ++ } ++ ++ if (fkeys->basic.n_proto == htons(ETH_P_IPV6)) { ++ if (BNXT_IPV6_4TUPLE(bp, fkeys)) ++ return sizeof(fkeys->addrs.v6addrs) + ++ sizeof(fkeys->ports); ++ ++ if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6) ++ return sizeof(fkeys->addrs.v6addrs); ++ } ++ ++ return 0; ++ } ++ ++ static u32 bnxt_toeplitz(struct bnxt *bp, struct flow_keys *fkeys, ++ const unsigned char *key) ++ { ++ u64 prefix = bp->toeplitz_prefix, hash = 0; ++ struct bnxt_ipv4_tuple tuple4; ++ struct bnxt_ipv6_tuple tuple6; ++ int i, j, len = 0; ++ u8 *four_tuple; ++ ++ len = bnxt_get_rss_flow_tuple_len(bp, fkeys); ++ if (!len) ++ return 0; ++ ++ if (fkeys->basic.n_proto == htons(ETH_P_IP)) { ++ tuple4.v4addrs = fkeys->addrs.v4addrs; ++ tuple4.ports = fkeys->ports; ++ four_tuple = (unsigned char *)&tuple4; ++ } else { ++ tuple6.v6addrs = fkeys->addrs.v6addrs; ++ tuple6.ports = fkeys->ports; ++ four_tuple = (unsigned char *)&tuple6; ++ } ++ ++ for (i = 0, j = 8; i < len; i++, j++) { ++ u8 byte = four_tuple[i]; ++ int bit; ++ ++ for (bit = 0; bit < 8; bit++, prefix <<= 1, byte <<= 1) { ++ if (byte & 0x80) ++ hash ^= prefix; ++ } ++ prefix |= (j < HW_HASH_KEY_SIZE) ? key[j] : 0; ++ } ++ ++ /* The valid part of the hash is in the upper 32 bits. */ ++ return (hash >> 32) & BNXT_NTP_FLTR_HASH_MASK; ++ } ++ ++ #ifdef CONFIG_RFS_ACCEL ++ static struct bnxt_l2_filter * ++ bnxt_lookup_l2_filter_from_key(struct bnxt *bp, struct bnxt_l2_key *key) ++ { ++ struct bnxt_l2_filter *fltr; ++ u32 idx; ++ ++ idx = jhash2(&key->filter_key, BNXT_L2_KEY_SIZE, bp->hash_seed) & ++ BNXT_L2_FLTR_HASH_MASK; ++ fltr = bnxt_lookup_l2_filter(bp, key, idx); ++ return fltr; ++ } ++ #endif ++ ++ static int bnxt_init_l2_filter(struct bnxt *bp, struct bnxt_l2_filter *fltr, ++ struct bnxt_l2_key *key, u32 idx) ++ { ++ struct hlist_head *head; ++ ++ ether_addr_copy(fltr->l2_key.dst_mac_addr, key->dst_mac_addr); ++ fltr->l2_key.vlan = key->vlan; ++ fltr->base.type = BNXT_FLTR_TYPE_L2; ++ if (fltr->base.flags) { ++ int bit_id; ++ ++ bit_id = bitmap_find_free_region(bp->ntp_fltr_bmap, ++ BNXT_MAX_FLTR, 0); ++ if (bit_id < 0) ++ return -ENOMEM; ++ fltr->base.sw_id = (u16)bit_id; ++ } ++ head = &bp->l2_fltr_hash_tbl[idx]; ++ hlist_add_head_rcu(&fltr->base.hash, head); ++ atomic_set(&fltr->refcnt, 1); ++ return 0; ++ } ++ ++ static struct bnxt_l2_filter *bnxt_alloc_l2_filter(struct bnxt *bp, ++ struct bnxt_l2_key *key, ++ gfp_t gfp) ++ { ++ struct bnxt_l2_filter *fltr; ++ u32 idx; ++ int rc; ++ ++ idx = jhash2(&key->filter_key, BNXT_L2_KEY_SIZE, bp->hash_seed) & ++ BNXT_L2_FLTR_HASH_MASK; ++ fltr = bnxt_lookup_l2_filter(bp, key, idx); ++ if (fltr) ++ return fltr; ++ ++ fltr = kzalloc(sizeof(*fltr), gfp); ++ if (!fltr) ++ return ERR_PTR(-ENOMEM); ++ spin_lock_bh(&bp->ntp_fltr_lock); ++ rc = bnxt_init_l2_filter(bp, fltr, key, idx); ++ spin_unlock_bh(&bp->ntp_fltr_lock); ++ if (rc) { ++ bnxt_del_l2_filter(bp, fltr); ++ fltr = ERR_PTR(rc); ++ } ++ return fltr; ++ } ++ ++ static u16 bnxt_vf_target_id(struct bnxt_pf_info *pf, u16 vf_idx) ++ { ++ #ifdef CONFIG_BNXT_SRIOV ++ struct bnxt_vf_info *vf = &pf->vf[vf_idx]; ++ ++ return vf->fw_fid; ++ #else ++ return INVALID_HW_RING_ID; ++ #endif ++ } ++ ++ int bnxt_hwrm_l2_filter_free(struct bnxt *bp, struct bnxt_l2_filter *fltr) ++ { ++ struct hwrm_cfa_l2_filter_free_input *req; ++ u16 target_id = 0xffff; ++ int rc; ++ ++ if (fltr->base.flags & BNXT_ACT_FUNC_DST) { ++ struct bnxt_pf_info *pf = &bp->pf; ++ ++ if (fltr->base.vf_idx >= pf->active_vfs) ++ return -EINVAL; ++ ++ target_id = bnxt_vf_target_id(pf, fltr->base.vf_idx); ++ if (target_id == INVALID_HW_RING_ID) ++ return -EINVAL; ++ } ++ ++ rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_FREE); ++ if (rc) ++ return rc; ++ ++ req->target_id = cpu_to_le16(target_id); ++ req->l2_filter_id = fltr->base.filter_id; ++ return hwrm_req_send(bp, req); ++ } ++ ++ int bnxt_hwrm_l2_filter_alloc(struct bnxt *bp, struct bnxt_l2_filter *fltr) ++ { ++ struct hwrm_cfa_l2_filter_alloc_output *resp; ++ struct hwrm_cfa_l2_filter_alloc_input *req; ++ u16 target_id = 0xffff; ++ int rc; ++ ++ if (fltr->base.flags & BNXT_ACT_FUNC_DST) { ++ struct bnxt_pf_info *pf = &bp->pf; ++ ++ if (fltr->base.vf_idx >= pf->active_vfs) ++ return -EINVAL; ++ ++ target_id = bnxt_vf_target_id(pf, fltr->base.vf_idx); ++ } ++ rc = hwrm_req_init(bp, req, HWRM_CFA_L2_FILTER_ALLOC); ++ if (rc) ++ return rc; ++ ++ req->target_id = cpu_to_le16(target_id); ++ req->flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX); ++ ++ if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) ++ req->flags |= ++ cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST); ++ req->dst_id = cpu_to_le16(fltr->base.fw_vnic_id); ++ req->enables = ++ cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR | ++ CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID | ++ CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK); ++ ether_addr_copy(req->l2_addr, fltr->l2_key.dst_mac_addr); ++ eth_broadcast_addr(req->l2_addr_mask); ++ ++ if (fltr->l2_key.vlan) { ++ req->enables |= ++ cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN | ++ CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN_MASK | ++ CFA_L2_FILTER_ALLOC_REQ_ENABLES_NUM_VLANS); ++ req->num_vlans = 1; ++ req->l2_ivlan = cpu_to_le16(fltr->l2_key.vlan); ++ req->l2_ivlan_mask = cpu_to_le16(0xfff); ++ } ++ ++ resp = hwrm_req_hold(bp, req); ++ rc = hwrm_req_send(bp, req); ++ if (!rc) { ++ fltr->base.filter_id = resp->l2_filter_id; ++ set_bit(BNXT_FLTR_VALID, &fltr->base.state); ++ } ++ hwrm_req_drop(bp, req); ++ return rc; ++ } ++ +++>>>>>>> d3c982851c15 (bnxt_en: Add function to calculate Toeplitz hash) + #ifdef CONFIG_RFS_ACCEL + static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, + struct bnxt_ntuple_filter *fltr) +@@@ -13557,15 -13958,14 +13853,15 @@@ static int bnxt_rx_flow_steer(struct ne + goto err_free; + } + + - new_fltr->l2_fltr = l2_fltr; + + memcpy(new_fltr->dst_mac_addr, eth->h_dest, ETH_ALEN); + + memcpy(new_fltr->src_mac_addr, eth->h_source, ETH_ALEN); + +- idx = skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK; ++ idx = bnxt_get_ntp_filter_idx(bp, fkeys, skb); + head = &bp->ntp_fltr_hash_tbl[idx]; + rcu_read_lock(); + - hlist_for_each_entry_rcu(fltr, head, base.hash) { + + hlist_for_each_entry_rcu(fltr, head, hash) { + if (bnxt_fltr_match(fltr, new_fltr)) { + - rc = fltr->base.sw_id; + + rc = fltr->sw_id; + rcu_read_unlock(); + goto err_free; + } +diff --cc drivers/net/ethernet/broadcom/bnxt/bnxt.h +index 936229d39a0f,3f4e4708f7d8..000000000000 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h +@@@ -1345,6 -1349,43 +1345,46 @@@ struct bnxt_ntuple_filter + unsigned long state; + #define BNXT_FLTR_VALID 0 + #define BNXT_FLTR_UPDATE 1 +++<<<<<<< HEAD +++======= ++ ++ struct rcu_head rcu; ++ }; ++ ++ struct bnxt_ntuple_filter { ++ struct bnxt_filter_base base; ++ struct flow_keys fkeys; ++ struct bnxt_l2_filter *l2_fltr; ++ u32 flow_id; ++ }; ++ ++ struct bnxt_l2_key { ++ union { ++ struct { ++ u8 dst_mac_addr[ETH_ALEN]; ++ u16 vlan; ++ }; ++ u32 filter_key; ++ }; ++ }; ++ ++ struct bnxt_ipv4_tuple { ++ struct flow_dissector_key_ipv4_addrs v4addrs; ++ struct flow_dissector_key_ports ports; ++ }; ++ ++ struct bnxt_ipv6_tuple { ++ struct flow_dissector_key_ipv6_addrs v6addrs; ++ struct flow_dissector_key_ports ports; ++ }; ++ ++ #define BNXT_L2_KEY_SIZE (sizeof(struct bnxt_l2_key) / 4) ++ ++ struct bnxt_l2_filter { ++ struct bnxt_filter_base base; ++ struct bnxt_l2_key l2_key; ++ atomic_t refcnt; +++>>>>>>> d3c982851c15 (bnxt_en: Add function to calculate Toeplitz hash) + }; + + struct bnxt_link_info { +@@@ -2371,6 -2417,14 +2411,17 @@@ struct bnxt + unsigned long *ntp_fltr_bmap; + int ntp_fltr_count; + +++<<<<<<< HEAD +++======= ++ #define BNXT_L2_FLTR_MAX_FLTR 1024 ++ #define BNXT_L2_FLTR_HASH_SIZE 32 ++ #define BNXT_L2_FLTR_HASH_MASK (BNXT_L2_FLTR_HASH_SIZE - 1) ++ struct hlist_head l2_fltr_hash_tbl[BNXT_L2_FLTR_HASH_SIZE]; ++ ++ u32 hash_seed; ++ u64 toeplitz_prefix; ++ +++>>>>>>> d3c982851c15 (bnxt_en: Add function to calculate Toeplitz hash) + /* To protect link related settings during link changes and + * ethtool settings changes. + */ +* Unmerged path drivers/net/ethernet/broadcom/bnxt/bnxt.c +* Unmerged path drivers/net/ethernet/broadcom/bnxt/bnxt.h diff --git a/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/rebuild.details.txt b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/rebuild.details.txt new file mode 100644 index 0000000000000..ca70b5ea6b9f8 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.21.1.el9_5/rebuild.details.txt @@ -0,0 +1,28 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v5.14~1..master: 278851 +Number of commits in rpm: 45 +Number of commits matched with upstream: 43 (95.56%) +Number of commits in upstream but not in rpm: 278808 +Number of commits NOT found in upstream: 2 (4.44%) + +Rebuilding Kernel on Branch rocky9_5_rebuild_kernel-5.14.0-503.21.1.el9_5 for kernel-5.14.0-503.21.1.el9_5 +Clean Cherry Picks: 33 (76.74%) +Empty Cherry Picks: 10 (23.26%) +_______________________________ + +__EMPTY COMMITS__________________________ +d3c982851c15ff1c5187a6188710daa7d0db7fe4 bnxt_en: Add function to calculate Toeplitz hash +5de1fce3369564ca6b9eed339838c51ec6290270 bnxt_en: Add support for user configured RSS key +7c6f714d88475ceae5342264858a641eafa19632 gfs2: Fix unlinked inode cleanup +c79ba4be351a06e0ac4c51143a83023bb37888d6 gfs2: Rename dinode_demise to evict_behavior +a94dafe87d5fdded799fc25b82b123fb93959421 gfs2: Return enum evict_behavior from gfs2_upgrade_iopen_glock +8c21c2c71e668a5eed9fe9981a2306f9178e6c3e gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode +a6033333ccce01ecada39b3ddabc03fd967e60c0 gfs2: Update to the evict / remote delete documentation +c5b7a2400edc458b22133d5e5394bea26eab1923 gfs2: Only defer deletes when we have an iopen glock +527ed4f7d902d362471a93e1a4afb604c18ceb48 mm: remove arguments of show_mem() +ce7356ae35943cc6494cc692e62d51a734062b7d mptcp: cope racing subflow creation in mptcp_rcv_space_adjust + +__CHANGES NOT IN UPSTREAM________________ +Porting to Rocky Linux 9, debranding and Rocky branding' +Ensure aarch64 kernel is not compressed' diff --git a/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7052622f.failed b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7052622f.failed new file mode 100644 index 0000000000000..caf5851a93709 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7052622f.failed @@ -0,0 +1,61 @@ +netfilter: nft_socket: Fix a NULL vs IS_ERR() bug in nft_socket_cgroup_subtree_level() + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.22.1.el9_5 +commit-author Dan Carpenter +commit 7052622fccb1efb850c6b55de477f65d03525a30 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7052622f.failed + +The cgroup_get_from_path() function never returns NULL, it returns error +pointers. Update the error handling to match. + +Fixes: 7f3287db6543 ("netfilter: nft_socket: make cgroupsv2 matching work with namespaces") + Signed-off-by: Dan Carpenter + Acked-by: Florian Westphal + Acked-by: Pablo Neira Ayuso +Link: https://patch.msgid.link/bbc0c4e0-05cc-4f44-8797-2f4b3920a820@stanley.mountain + Signed-off-by: Jakub Kicinski +(cherry picked from commit 7052622fccb1efb850c6b55de477f65d03525a30) + Signed-off-by: Jonathan Maple + +# Conflicts: +# net/netfilter/nft_socket.c +diff --cc net/netfilter/nft_socket.c +index 7605b96d8ebc,0a8883a93e83..000000000000 +--- a/net/netfilter/nft_socket.c ++++ b/net/netfilter/nft_socket.c +@@@ -52,6 -54,28 +52,31 @@@ nft_sock_get_eval_cgroupv2(u32 *dest, s + memcpy(dest, &cgid, sizeof(u64)); + return true; + } +++<<<<<<< HEAD +++======= ++ ++ /* process context only, uses current->nsproxy. */ ++ static noinline int nft_socket_cgroup_subtree_level(void) ++ { ++ struct cgroup *cgrp = cgroup_get_from_path("/"); ++ int level; ++ ++ if (IS_ERR(cgrp)) ++ return PTR_ERR(cgrp); ++ ++ level = cgrp->level; ++ ++ cgroup_put(cgrp); ++ ++ if (WARN_ON_ONCE(level > 255)) ++ return -ERANGE; ++ ++ if (WARN_ON_ONCE(level < 0)) ++ return -EINVAL; ++ ++ return level; ++ } +++>>>>>>> 7052622fccb1 (netfilter: nft_socket: Fix a NULL vs IS_ERR() bug in nft_socket_cgroup_subtree_level()) + #endif + + static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) +* Unmerged path net/netfilter/nft_socket.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7f3287db.failed b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7f3287db.failed new file mode 100644 index 0000000000000..2565246841b8b --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7f3287db.failed @@ -0,0 +1,96 @@ +netfilter: nft_socket: make cgroupsv2 matching work with namespaces + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.22.1.el9_5 +commit-author Florian Westphal +commit 7f3287db654395f9c5ddd246325ff7889f550286 +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/7f3287db.failed + +When running in container environmment, /sys/fs/cgroup/ might not be +the real root node of the sk-attached cgroup. + +Example: + +In container: +% stat /sys//fs/cgroup/ +Device: 0,21 Inode: 2214 .. +% stat /sys/fs/cgroup/foo +Device: 0,21 Inode: 2264 .. + +The expectation would be for: + + nft add rule .. socket cgroupv2 level 1 "foo" counter + +to match traffic from a process that got added to "foo" via +"echo $pid > /sys/fs/cgroup/foo/cgroup.procs". + +However, 'level 3' is needed to make this work. + +Seen from initial namespace, the complete hierarchy is: + +% stat /sys/fs/cgroup/system.slice/docker-.../foo + Device: 0,21 Inode: 2264 .. + +i.e. hierarchy is +0 1 2 3 +/ -> system.slice -> docker-1... -> foo + +... but the container doesn't know that its "/" is the "docker-1.." +cgroup. Current code will retrieve the 'system.slice' cgroup node +and store its kn->id in the destination register, so compare with +2264 ("foo" cgroup id) will not match. + +Fetch "/" cgroup from ->init() and add its level to the level we try to +extract. cgroup root-level is 0 for the init-namespace or the level +of the ancestor that is exposed as the cgroup root inside the container. + +In the above case, cgrp->level of "/" resolved in the container is 2 +(docker-1...scope/) and request for 'level 1' will get adjusted +to fetch the actual level (3). + +v2: use CONFIG_SOCK_CGROUP_DATA, eval function depends on it. + (kernel test robot) + + Cc: cgroups@vger.kernel.org +Fixes: e0bb96db96f8 ("netfilter: nft_socket: add support for cgroupsv2") + Reported-by: Nadia Pinaeva + Signed-off-by: Florian Westphal + Signed-off-by: Pablo Neira Ayuso +(cherry picked from commit 7f3287db654395f9c5ddd246325ff7889f550286) + Signed-off-by: Jonathan Maple + +# Conflicts: +# net/netfilter/nft_socket.c +diff --cc net/netfilter/nft_socket.c +index 7605b96d8ebc,12cdff640492..000000000000 +--- a/net/netfilter/nft_socket.c ++++ b/net/netfilter/nft_socket.c +@@@ -9,7 -9,9 +9,13 @@@ + + struct nft_socket { + enum nft_socket_keys key:8; +++<<<<<<< HEAD + + u8 level; +++======= ++ u8 level; /* cgroupv2 level to extract */ ++ u8 level_user; /* cgroupv2 level provided by userspace */ ++ u8 len; +++>>>>>>> 7f3287db6543 (netfilter: nft_socket: make cgroupsv2 matching work with namespaces) + union { + u8 dreg; + }; +@@@ -207,7 -244,7 +247,11 @@@ static int nft_socket_dump(struct sk_bu + if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) + return -1; + if (priv->key == NFT_SOCKET_CGROUPV2 && +++<<<<<<< HEAD + + nla_put_u32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) +++======= ++ nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) +++>>>>>>> 7f3287db6543 (netfilter: nft_socket: make cgroupsv2 matching work with namespaces) + return -1; + return 0; + } +* Unmerged path net/netfilter/nft_socket.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/c1aa3886.failed b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/c1aa3886.failed new file mode 100644 index 0000000000000..5287c65081570 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/c1aa3886.failed @@ -0,0 +1,133 @@ +netfilter: nf_tables: store new sets in dedicated list + +jira LE-2290 +Rebuild_History Non-Buildable kernel-5.14.0-503.22.1.el9_5 +commit-author Florian Westphal +commit c1aa38866b9c58dc6cf7a5fc6a3e1ca75565169e +Empty-Commit: Cherry-Pick Conflicts during history rebuild. +Will be included in final tarball splat. Ref for failed cherry-pick at: +ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/c1aa3886.failed + +nft_set_lookup_byid() is very slow when transaction becomes large, due to +walk of the transaction list. + +Add a dedicated list that contains only the new sets. + +Before: nft -f ruleset 0.07s user 0.00s system 0% cpu 1:04.84 total +After: nft -f ruleset 0.07s user 0.00s system 0% cpu 30.115 total + +.. where ruleset contains ~10 sets with ~100k elements. +The above number is for a combined flush+reload of the ruleset. + +With previous flush, even the first NEWELEM has to walk through a few +hundred thousands of DELSET(ELEM) transactions before the first NEWSET +object. To cope with random-order-newset-newsetelem we'd need to replace +commit_set_list with a hashtable. + +Expectation is that a NEWELEM operation refers to the most recently added +set, so last entry of the dedicated list should be the set we want. + +NB: This is not a bug fix per se (functionality is fine), but with +larger transaction batches list search takes forever, so it would be +nice to speed this up for -stable too, hence adding a "fixes" tag. + +Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") + Reported-by: Nadia Pinaeva + Signed-off-by: Florian Westphal + Signed-off-by: Pablo Neira Ayuso +(cherry picked from commit c1aa38866b9c58dc6cf7a5fc6a3e1ca75565169e) + Signed-off-by: Jonathan Maple + +# Conflicts: +# include/net/netfilter/nf_tables.h +# net/netfilter/nf_tables_api.c +diff --cc include/net/netfilter/nf_tables.h +index f3d24766182f,2be4738eae1c..000000000000 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@@ -1552,16 -1659,22 +1552,21 @@@ struct nft_trans_rule + bool bound; + }; + + -#define nft_trans_container_rule(trans) \ + - container_of(trans, struct nft_trans_rule, nft_trans) + -#define nft_trans_rule(trans) \ + - nft_trans_container_rule(trans)->rule + -#define nft_trans_flow_rule(trans) \ + - nft_trans_container_rule(trans)->flow + -#define nft_trans_rule_id(trans) \ + - nft_trans_container_rule(trans)->rule_id + -#define nft_trans_rule_bound(trans) \ + - nft_trans_container_rule(trans)->bound + -#define nft_trans_rule_chain(trans) \ + - nft_trans_container_rule(trans)->chain + +#define nft_trans_rule(trans) \ + + (((struct nft_trans_rule *)trans->data)->rule) + +#define nft_trans_flow_rule(trans) \ + + (((struct nft_trans_rule *)trans->data)->flow) + +#define nft_trans_rule_id(trans) \ + + (((struct nft_trans_rule *)trans->data)->rule_id) + +#define nft_trans_rule_bound(trans) \ + + (((struct nft_trans_rule *)trans->data)->bound) + + struct nft_trans_set { +++<<<<<<< HEAD +++======= ++ struct nft_trans_binding nft_trans_binding; ++ struct list_head list_trans_newset; +++>>>>>>> c1aa38866b9c (netfilter: nf_tables: store new sets in dedicated list) + struct nft_set *set; + u32 set_id; + u32 gc_int; +diff --cc net/netfilter/nf_tables_api.c +index c82ad7ab90a7,3ea5d0163510..000000000000 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@@ -374,12 -392,24 +374,31 @@@ static void nf_tables_unregister_hook(s + static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) + { + struct nftables_pernet *nft_net = nft_pernet(net); +++<<<<<<< HEAD +++======= ++ struct nft_trans_binding *binding; ++ struct nft_trans_set *trans_set; ++ ++ list_add_tail(&trans->list, &nft_net->commit_list); ++ ++ binding = nft_trans_get_binding(trans); ++ if (!binding) ++ return; +++>>>>>>> c1aa38866b9c (netfilter: nf_tables: store new sets in dedicated list) + + switch (trans->msg_type) { + case NFT_MSG_NEWSET: ++ trans_set = nft_trans_container_set(trans); ++ + if (!nft_trans_set_update(trans) && + nft_set_is_anonymous(nft_trans_set(trans))) +++<<<<<<< HEAD + + list_add_tail(&trans->binding_list, &nft_net->binding_list); +++======= ++ list_add_tail(&binding->binding_list, &nft_net->binding_list); ++ ++ list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list); +++>>>>>>> c1aa38866b9c (netfilter: nf_tables: store new sets in dedicated list) + break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && +@@@ -568,6 -614,10 +587,13 @@@ static int __nft_trans_set_add(const st + if (trans == NULL) + return -ENOMEM; + +++<<<<<<< HEAD +++======= ++ trans_set = nft_trans_container_set(trans); ++ INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); ++ INIT_LIST_HEAD(&trans_set->list_trans_newset); ++ +++>>>>>>> c1aa38866b9c (netfilter: nf_tables: store new sets in dedicated list) + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { + nft_trans_set_id(trans) = + ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); +* Unmerged path include/net/netfilter/nf_tables.h +* Unmerged path net/netfilter/nf_tables_api.c diff --git a/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/rebuild.details.txt b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/rebuild.details.txt new file mode 100644 index 0000000000000..a9f15b5d8e2b7 --- /dev/null +++ b/ciq/ciq_backports/kernel-5.14.0-503.22.1.el9_5/rebuild.details.txt @@ -0,0 +1,22 @@ +Rebuild_History BUILDABLE +Rebuilding Kernel from rpm changelog with Fuzz Limit: 87.50% +Number of commits in upstream range v5.14~1..master: 278851 +Number of commits in rpm: 47 +Number of commits matched with upstream: 44 (93.62%) +Number of commits in upstream but not in rpm: 278807 +Number of commits NOT found in upstream: 3 (6.38%) + +Rebuilding Kernel on Branch rocky9_5_rebuild_kernel-5.14.0-503.22.1.el9_5 for kernel-5.14.0-503.22.1.el9_5 +Clean Cherry Picks: 41 (93.18%) +Empty Cherry Picks: 3 (6.82%) +_______________________________ + +__EMPTY COMMITS__________________________ +7f3287db654395f9c5ddd246325ff7889f550286 netfilter: nft_socket: make cgroupsv2 matching work with namespaces +7052622fccb1efb850c6b55de477f65d03525a30 netfilter: nft_socket: Fix a NULL vs IS_ERR() bug in nft_socket_cgroup_subtree_level() +c1aa38866b9c58dc6cf7a5fc6a3e1ca75565169e netfilter: nf_tables: store new sets in dedicated list + +__CHANGES NOT IN UPSTREAM________________ +Porting to Rocky Linux 9, debranding and Rocky branding' +Ensure aarch64 kernel is not compressed' +rh_messages.h: un-unmaintain hfi1 diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index dfc943fab87b4..5fba07aac4cda 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -255,22 +255,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr; - if (hwq_attr->type == HWQ_TYPE_MR) { - /* For MR it is expected that we supply only 1 contigous - * page i.e only 1 entry in the PDL that will contain - * all the PBLs for the user supplied memory region - */ - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[0][i] = src_phys_ptr[i] | - flag; - } else { - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = - src_phys_ptr[i] | - PTU_PDE_VALID; - } + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) + dst_virt_ptr[0][i] = src_phys_ptr[i] | flag; + /* Alloc or init PTEs */ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2], hwq_attr->sginfo); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e15f15706158e..6f845eb3cabb1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4177,13 +4177,36 @@ static void bnxt_init_vnics(struct bnxt *bp) vnic->fw_l2_ctx_id = INVALID_HW_RING_ID; if (bp->vnic_info[i].rss_hash_key) { - if (i == 0) - get_random_bytes(vnic->rss_hash_key, - HW_HASH_KEY_SIZE); - else + if (!i) { + u8 *key = (void *)vnic->rss_hash_key; + int k; + + if (!bp->rss_hash_key_valid && + !bp->rss_hash_key_updated) { + get_random_bytes(bp->rss_hash_key, + HW_HASH_KEY_SIZE); + bp->rss_hash_key_updated = true; + } + + memcpy(vnic->rss_hash_key, bp->rss_hash_key, + HW_HASH_KEY_SIZE); + + if (!bp->rss_hash_key_updated) + continue; + + bp->rss_hash_key_updated = false; + bp->rss_hash_key_valid = true; + + bp->toeplitz_prefix = 0; + for (k = 0; k < 8; k++) { + bp->toeplitz_prefix <<= 8; + bp->toeplitz_prefix |= key[k]; + } + } else { memcpy(vnic->rss_hash_key, bp->vnic_info[0].rss_hash_key, HW_HASH_KEY_SIZE); + } } } } @@ -5252,6 +5275,79 @@ static int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, u16 vnic_id) return hwrm_req_send_silent(bp, req); } +#define BNXT_IPV4_4TUPLE(bp, fkeys) \ + (((fkeys)->basic.ip_proto == IPPROTO_TCP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4) || \ + ((fkeys)->basic.ip_proto == IPPROTO_UDP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4)) + +#define BNXT_IPV6_4TUPLE(bp, fkeys) \ + (((fkeys)->basic.ip_proto == IPPROTO_TCP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6) || \ + ((fkeys)->basic.ip_proto == IPPROTO_UDP && \ + (bp)->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6)) + +static u32 bnxt_get_rss_flow_tuple_len(struct bnxt *bp, struct flow_keys *fkeys) +{ + if (fkeys->basic.n_proto == htons(ETH_P_IP)) { + if (BNXT_IPV4_4TUPLE(bp, fkeys)) + return sizeof(fkeys->addrs.v4addrs) + + sizeof(fkeys->ports); + + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4) + return sizeof(fkeys->addrs.v4addrs); + } + + if (fkeys->basic.n_proto == htons(ETH_P_IPV6)) { + if (BNXT_IPV6_4TUPLE(bp, fkeys)) + return sizeof(fkeys->addrs.v6addrs) + + sizeof(fkeys->ports); + + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6) + return sizeof(fkeys->addrs.v6addrs); + } + + return 0; +} + +static u32 bnxt_toeplitz(struct bnxt *bp, struct flow_keys *fkeys, + const unsigned char *key) +{ + u64 prefix = bp->toeplitz_prefix, hash = 0; + struct bnxt_ipv4_tuple tuple4; + struct bnxt_ipv6_tuple tuple6; + int i, j, len = 0; + u8 *four_tuple; + + len = bnxt_get_rss_flow_tuple_len(bp, fkeys); + if (!len) + return 0; + + if (fkeys->basic.n_proto == htons(ETH_P_IP)) { + tuple4.v4addrs = fkeys->addrs.v4addrs; + tuple4.ports = fkeys->ports; + four_tuple = (unsigned char *)&tuple4; + } else { + tuple6.v6addrs = fkeys->addrs.v6addrs; + tuple6.ports = fkeys->ports; + four_tuple = (unsigned char *)&tuple6; + } + + for (i = 0, j = 8; i < len; i++, j++) { + u8 byte = four_tuple[i]; + int bit; + + for (bit = 0; bit < 8; bit++, prefix <<= 1, byte <<= 1) { + if (byte & 0x80) + hash ^= prefix; + } + prefix |= (j < HW_HASH_KEY_SIZE) ? key[j] : 0; + } + + /* The valid part of the hash is in the upper 32 bits. */ + return (hash >> 32) & BNXT_NTP_FLTR_HASH_MASK; +} + #ifdef CONFIG_RFS_ACCEL static int bnxt_hwrm_cfa_ntuple_filter_free(struct bnxt *bp, struct bnxt_ntuple_filter *fltr) @@ -13469,6 +13565,18 @@ static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static u32 bnxt_get_ntp_filter_idx(struct bnxt *bp, struct flow_keys *fkeys, + const struct sk_buff *skb) +{ + struct bnxt_vnic_info *vnic; + + if (skb) + return skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK; + + vnic = &bp->vnic_info[0]; + return bnxt_toeplitz(bp, fkeys, (void *)vnic->rss_hash_key); +} + #ifdef CONFIG_RFS_ACCEL static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1, struct bnxt_ntuple_filter *f2) @@ -13560,7 +13668,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, memcpy(new_fltr->dst_mac_addr, eth->h_dest, ETH_ALEN); memcpy(new_fltr->src_mac_addr, eth->h_source, ETH_ALEN); - idx = skb_get_hash_raw(skb) & BNXT_NTP_FLTR_HASH_MASK; + idx = bnxt_get_ntp_filter_idx(bp, fkeys, skb); head = &bp->ntp_fltr_hash_tbl[idx]; rcu_read_lock(); hlist_for_each_entry_rcu(fltr, head, hash) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 936229d39a0fd..9d53fb1342cc1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -38,6 +38,16 @@ extern struct list_head bnxt_block_cb_list; struct page_pool; +struct bnxt_ipv4_tuple { + struct flow_dissector_key_ipv4_addrs v4addrs; + struct flow_dissector_key_ports ports; +}; + +struct bnxt_ipv6_tuple { + struct flow_dissector_key_ipv6_addrs v6addrs; + struct flow_dissector_key_ports ports; +}; + struct tx_bd { __le32 tx_bd_len_flags_type; #define TX_BD_TYPE (0x3f << 0) @@ -2158,12 +2168,17 @@ struct bnxt { u16 rss_indir_tbl_entries; u32 rss_hash_cfg; u32 rss_hash_delta; + u64 toeplitz_prefix; u32 rss_cap; #define BNXT_RSS_CAP_RSS_HASH_TYPE_DELTA BIT(0) #define BNXT_RSS_CAP_UDP_RSS_CAP BIT(1) #define BNXT_RSS_CAP_NEW_RSS_CAP BIT(2) #define BNXT_RSS_CAP_RSS_TCAM BIT(3) + u8 rss_hash_key[HW_HASH_KEY_SIZE]; + u8 rss_hash_key_valid:1; + u8 rss_hash_key_updated:1; + u16 max_mtu; u16 tso_max_segs; u8 max_tc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 514f8ae1ae6fd..5c8e044cfc3bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1361,8 +1361,10 @@ static int bnxt_set_rxfh(struct net_device *dev, if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->key) - return -EOPNOTSUPP; + if (rxfh->key) { + memcpy(bp->rss_hash_key, rxfh->key, HW_HASH_KEY_SIZE); + bp->rss_hash_key_updated = true; + } if (rxfh->indir) { u32 i, pad, tbl_size = bnxt_get_rxfh_indir_size(dev); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index bca2084cc54b4..6b0149d70e2da 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -754,6 +754,7 @@ enum i40e_filter_state { I40E_FILTER_ACTIVE, /* Added to switch by FW */ I40E_FILTER_FAILED, /* Rejected by FW */ I40E_FILTER_REMOVE, /* To be removed */ + I40E_FILTER_NEW_SYNC, /* New, not sent yet, is in i40e_sync_vsi_filters() */ /* There is no 'removed' state; the filter struct is freed */ }; struct i40e_mac_filter { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index abf624d770e67..208c2f0857b61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -89,6 +89,7 @@ static char *i40e_filter_state_string[] = { "ACTIVE", "FAILED", "REMOVE", + "NEW_SYNC", }; /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c73e90543fc01..c89865550501a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1255,6 +1255,7 @@ int i40e_count_filters(struct i40e_vsi *vsi) hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC || f->state == I40E_FILTER_ACTIVE) ++cnt; } @@ -1441,6 +1442,8 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, new->f = add_head; new->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new->hlist, tmp_add_list); @@ -1550,6 +1553,8 @@ static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, return -ENOMEM; new_mac->f = add_head; new_mac->state = add_head->state; + if (add_head->state == I40E_FILTER_NEW) + add_head->state = I40E_FILTER_NEW_SYNC; /* Add the new filter to the tmp list */ hlist_add_head(&new_mac->hlist, tmp_add_list); @@ -2436,7 +2441,8 @@ static int i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_mac_filter *f) { - bool enable = f->state == I40E_FILTER_NEW; + bool enable = f->state == I40E_FILTER_NEW || + f->state == I40E_FILTER_NEW_SYNC; struct i40e_hw *hw = &vsi->back->hw; int aq_ret; @@ -2610,6 +2616,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* Add it to the hash list */ hlist_add_head(&new->hlist, &tmp_add_list); + f->state = I40E_FILTER_NEW_SYNC; } /* Count the number of active (current and new) VLAN @@ -2761,7 +2768,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) spin_lock_bh(&vsi->mac_filter_hash_lock); hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* Only update the state if we're still NEW */ - if (new->f->state == I40E_FILTER_NEW) + if (new->f->state == I40E_FILTER_NEW || + new->f->state == I40E_FILTER_NEW_SYNC) new->f->state = new->state; hlist_del(&new->hlist); netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index b26f00a836cea..244d10a4dd8b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -512,6 +512,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) xsk_pool_fill_cb(ring->xsk_pool, &desc); } +/** + * ice_get_frame_sz - calculate xdp_buff::frame_sz + * @rx_ring: the ring being configured + * + * Return frame size based on underlying PAGE_SIZE + */ +static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) +{ + unsigned int frame_sz; + +#if (PAGE_SIZE >= 8192) + frame_sz = rx_ring->rx_buf_len; +#else + frame_sz = ice_rx_pg_size(rx_ring) / 2; +#endif + + return frame_sz; +} + /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -576,7 +595,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } } - xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8d25b69812698..c9bc3f1add5d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -521,30 +521,6 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) return -ENOMEM; } -/** - * ice_rx_frame_truesize - * @rx_ring: ptr to Rx ring - * @size: size - * - * calculate the truesize with taking into the account PAGE_SIZE of - * underlying arch - */ -static unsigned int -ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; -} - /** * ice_run_xdp - Executes an XDP program on initialized xdp_buff * @rx_ring: Rx ring @@ -837,16 +813,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (!dev_page_is_reusable(page)) return false; -#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) return false; -#else +#if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; -#endif /* PAGE_SIZE < 8192) */ +#endif /* PAGE_SIZE >= 8192) */ /* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the @@ -949,12 +924,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = -#if (PAGE_SIZE < 8192) - page_count(rx_buf->page); -#else - 0; -#endif + rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -1160,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) bool failure; u32 first; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); -#endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1223,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); -#endif xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2b00db92b08f5..7f7ea468ffa91 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3712,9 +3712,7 @@ struct ixgbe_info { #define IXGBE_KRM_LINK_S1(P) ((P) ? 0x8200 : 0x4200) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) #define IXGBE_KRM_AN_CNTL_1(P) ((P) ? 0x822C : 0x422C) -#define IXGBE_KRM_AN_CNTL_4(P) ((P) ? 0x8238 : 0x4238) #define IXGBE_KRM_AN_CNTL_8(P) ((P) ? 0x8248 : 0x4248) -#define IXGBE_KRM_PCS_KX_AN(P) ((P) ? 0x9918 : 0x5918) #define IXGBE_KRM_SGMII_CTRL(P) ((P) ? 0x82A0 : 0x42A0) #define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C) #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634) @@ -3724,7 +3722,6 @@ struct ixgbe_info { #define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054) #define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520) #define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00) -#define IXGBE_KRM_FLX_TMRS_CTRL_ST31(P) ((P) ? 0x9180 : 0x5180) #define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA ~(0x3 << 20) #define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR BIT(20) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index aa4bf6c9a2f7c..35c2b9b8bd191 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1721,59 +1721,9 @@ static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) return IXGBE_ERR_LINK_SETUP; } - (void)mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - /* change mode enforcement rules to hybrid */ - (void)mac->ops.read_iosf_sb_reg(hw, - IXGBE_KRM_FLX_TMRS_CTRL_ST31(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - reg_val |= 0x0400; - - (void)mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_FLX_TMRS_CTRL_ST31(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - /* manually control the config */ - (void)mac->ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - reg_val |= 0x20002240; - - (void)mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - /* move the AN base page values */ - (void)mac->ops.read_iosf_sb_reg(hw, - IXGBE_KRM_PCS_KX_AN(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - reg_val |= 0x1; - - (void)mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_PCS_KX_AN(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - /* set the AN37 over CB mode */ - (void)mac->ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_4(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - reg_val |= 0x20000000; - - (void)mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_4(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - /* restart AN manually */ - (void)mac->ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; - - (void)mac->ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + status = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); /* Toggle port SW reset by AN reset. */ status = ixgbe_restart_an_internal_phy_x550em(hw); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index d761a1235994c..7ea798a4949e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -481,11 +481,33 @@ mlxsw_sp_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, struct netlink_ext_ack *extack) { + u32 new_kvdl_index, old_kvdl_index = ipip_entry->dip_kvdl_index; + struct in6_addr old_addr6 = ipip_entry->parms.daddr.addr6; struct mlxsw_sp_ipip_parms new_parms; + int err; new_parms = mlxsw_sp_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev); - return mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry, - &new_parms, extack); + + err = mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, + &new_parms.daddr.addr6, + &new_kvdl_index); + if (err) + return err; + ipip_entry->dip_kvdl_index = new_kvdl_index; + + err = mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry, + &new_parms, extack); + if (err) + goto err_change_gre; + + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &old_addr6); + + return 0; + +err_change_gre: + ipip_entry->dip_kvdl_index = old_kvdl_index; + mlxsw_sp_ipv6_addr_put(mlxsw_sp, &new_parms.daddr.addr6); + return err; } static int diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6102416579c28..c8dacb6053d23 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3131,8 +3131,9 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct } if (!ctrl->maxcmd) { - dev_err(ctrl->device, "Maximum outstanding commands is 0\n"); - return -EINVAL; + dev_warn(ctrl->device, + "Firmware bug: maximum outstanding commands is 0\n"); + ctrl->maxcmd = ctrl->sqsize + 1; } return 0; diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1fdba3deef8b7..5d9cf2c363dd3 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -102,7 +102,7 @@ static inline int memcpy_hsa_kernel(void *dst, unsigned long src, size_t count) kvec.iov_base = dst; kvec.iov_len = count; - iov_iter_kvec(&iter, WRITE, &kvec, 1, count); + iov_iter_kvec(&iter, READ, &kvec, 1, count); if (memcpy_hsa_iter(&iter, src, count) < count) return -EIO; return 0; diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index c6e2ca001a895..d2bafc45cde82 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -7302,13 +7302,13 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); } mbox->vport = phba->pport; - - rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); + rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_SLI4_CONFIG_TMO); if (rc == MBX_NOT_FINISHED) { rc = 1; goto error; } - + if (rc == MBX_TIMEOUT) + goto error; if (phba->sli_rev == LPFC_SLI_REV4) mp = mbox->ctx_buf; else @@ -7361,7 +7361,10 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys); } - rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30); + rc = lpfc_sli_issue_mbox_wait(phba, mbox, LPFC_MBOX_SLI4_CONFIG_TMO); + + if (rc == MBX_TIMEOUT) + goto error; if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) { rc = 1; goto error; @@ -7372,8 +7375,10 @@ int lpfc_get_sfp_info_wait(struct lpfc_hba *phba, DMP_SFF_PAGE_A2_SIZE); error: - mbox->ctx_buf = mpsave; - lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED); + if (mbox->mbox_flag & LPFC_MBX_WAKE) { + mbox->ctx_buf = mpsave; + lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED); + } return rc; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index c96737770d09c..bcbd6fda3cf7b 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -175,7 +175,8 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) ndlp->nlp_state, ndlp->fc4_xpt_flags); /* Don't schedule a worker thread event if the vport is going down. */ - if (test_bit(FC_UNLOADING, &vport->load_flag)) { + if (test_bit(FC_UNLOADING, &vport->load_flag) || + !test_bit(HBA_SETUP, &phba->hba_flag)) { spin_lock_irqsave(&ndlp->lock, iflags); ndlp->rport = NULL; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 5d07044b74bc4..280c3ed164d2a 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5562,11 +5562,20 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd) iocb = &lpfc_cmd->cur_iocbq; if (phba->sli_rev == LPFC_SLI_REV4) { - pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring; - if (!pring_s4) { + /* if the io_wq & pring are gone, the port was reset. */ + if (!phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq || + !phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP, + "2877 SCSI Layer I/O Abort Request " + "IO CMPL Status x%x ID %d LUN %llu " + "HBA_SETUP %d\n", FAILED, + cmnd->device->id, + (u64)cmnd->device->lun, + test_bit(HBA_SETUP, &phba->hba_flag)); ret = FAILED; goto out_unlock_hba; } + pring_s4 = phba->sli4_hba.hdwq[iocb->hba_wqidx].io_wq->pring; spin_lock(&pring_s4->ring_lock); } /* the command is in process of being cancelled */ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index a6bdc3689a17d..8aafda1fe7280 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4689,6 +4689,17 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba) /* Look on all the FCP Rings for the iotag */ if (phba->sli_rev >= LPFC_SLI_REV4) { for (i = 0; i < phba->cfg_hdw_queue; i++) { + if (!phba->sli4_hba.hdwq || + !phba->sli4_hba.hdwq[i].io_wq) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "7777 hdwq's deleted %lx " + "%lx %x %x\n", + phba->pport->load_flag, + phba->hba_flag, + phba->link_state, + phba->sli.sli_flag); + return; + } pring = phba->sli4_hba.hdwq[i].io_wq->pring; spin_lock_irq(&pring->ring_lock); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bfb58f009e9df..6e8bdf130bb59 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -149,6 +149,8 @@ struct hv_fc_wwn_packet { */ static int vmstor_proto_version; +static bool hv_dev_is_fc(struct hv_device *hv_dev); + #define STORVSC_LOGGING_NONE 0 #define STORVSC_LOGGING_ERROR 1 #define STORVSC_LOGGING_WARN 2 @@ -316,6 +318,9 @@ enum storvsc_request_type { #define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ERROR 0x04 #define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_TIMEOUT 0x09 +#define SRB_STATUS_SELECTION_TIMEOUT 0x0A +#define SRB_STATUS_BUS_RESET 0x0E #define SRB_STATUS_DATA_OVERRUN 0x12 #define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS_INTERNAL_ERROR 0x30 @@ -981,6 +986,10 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, case SRB_STATUS_ABORTED: case SRB_STATUS_INVALID_REQUEST: case SRB_STATUS_INTERNAL_ERROR: + case SRB_STATUS_TIMEOUT: + case SRB_STATUS_SELECTION_TIMEOUT: + case SRB_STATUS_BUS_RESET: + case SRB_STATUS_DATA_OVERRUN: if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { /* Check for capacity change */ if ((asc == 0x2a) && (ascq == 0x9)) { @@ -1130,6 +1139,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, * not correctly handle: * INQUIRY command with page code parameter set to 0x80 * MODE_SENSE command with cmd[2] == 0x1c + * MAINTENANCE_IN is not supported by HyperV FC passthrough * * Setup srb and scsi status so this won't be fatal. * We do this so we can distinguish truly fatal failues @@ -1137,7 +1147,9 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, */ if ((stor_pkt->vm_srb.cdb[0] == INQUIRY) || - (stor_pkt->vm_srb.cdb[0] == MODE_SENSE)) { + (stor_pkt->vm_srb.cdb[0] == MODE_SENSE) || + (stor_pkt->vm_srb.cdb[0] == MAINTENANCE_IN && + hv_dev_is_fc(device))) { vstor_packet->vm_srb.scsi_status = 0; vstor_packet->vm_srb.srb_status = SRB_STATUS_SUCCESS; } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 1bbbe490caec7..7016d23cf4bcc 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -342,7 +342,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = { static void sysrq_handle_showmem(int key) { - show_mem(0, NULL); + show_mem(); } static const struct sysrq_key_op sysrq_showmem_op = { .handler = sysrq_handle_showmem, diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 4ab925a054540..6e8eeffbdae40 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -606,7 +606,7 @@ static void fn_scroll_back(struct vc_data *vc) static void fn_show_mem(struct vc_data *vc) { - show_mem(0, NULL); + show_mem(); } static void fn_show_state(struct vc_data *vc) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index d1e04018fedd2..be1d54dfc38b0 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -29,8 +29,6 @@ #include #include -#include "sev-guest.h" - #define DEVICE_NAME "sev-guest" #define AAD_LEN 48 #define MSG_HDR_VER 1 diff --git a/drivers/virt/coco/sev-guest/sev-guest.h b/drivers/virt/coco/sev-guest/sev-guest.h deleted file mode 100644 index 21bda26fdb953..0000000000000 --- a/drivers/virt/coco/sev-guest/sev-guest.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2021 Advanced Micro Devices, Inc. - * - * Author: Brijesh Singh - * - * SEV-SNP API spec is available at https://developer.amd.com/sev - */ - -#ifndef __VIRT_SEVGUEST_H__ -#define __VIRT_SEVGUEST_H__ - -#include - -#define MAX_AUTHTAG_LEN 32 - -/* See SNP spec SNP_GUEST_REQUEST section for the structure */ -enum msg_type { - SNP_MSG_TYPE_INVALID = 0, - SNP_MSG_CPUID_REQ, - SNP_MSG_CPUID_RSP, - SNP_MSG_KEY_REQ, - SNP_MSG_KEY_RSP, - SNP_MSG_REPORT_REQ, - SNP_MSG_REPORT_RSP, - SNP_MSG_EXPORT_REQ, - SNP_MSG_EXPORT_RSP, - SNP_MSG_IMPORT_REQ, - SNP_MSG_IMPORT_RSP, - SNP_MSG_ABSORB_REQ, - SNP_MSG_ABSORB_RSP, - SNP_MSG_VMRK_REQ, - SNP_MSG_VMRK_RSP, - - SNP_MSG_TYPE_MAX -}; - -enum aead_algo { - SNP_AEAD_INVALID, - SNP_AEAD_AES_256_GCM, -}; - -struct snp_guest_msg_hdr { - u8 authtag[MAX_AUTHTAG_LEN]; - u64 msg_seqno; - u8 rsvd1[8]; - u8 algo; - u8 hdr_version; - u16 hdr_sz; - u8 msg_type; - u8 msg_version; - u16 msg_sz; - u32 rsvd2; - u8 msg_vmpck; - u8 rsvd3[35]; -} __packed; - -struct snp_guest_msg { - struct snp_guest_msg_hdr hdr; - u8 payload[4000]; -} __packed; - -#endif /* __VIRT_SEVGUEST_H__ */ diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 759718df7d905..d50b9d4653ad5 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -573,11 +574,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) gl->gl_tchange = jiffies; } -static void gfs2_set_demote(struct gfs2_glock *gl) +static void gfs2_set_demote(int nr, struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - set_bit(GLF_DEMOTE, &gl->gl_flags); + set_bit(nr, &gl->gl_flags); smp_mb(); wake_up(&sdp->sd_async_glock_wait); } @@ -971,20 +972,22 @@ static void gfs2_glock_poke(struct gfs2_glock *gl) gfs2_holder_uninit(&gh); } -static bool gfs2_try_evict(struct gfs2_glock *gl) +static void gfs2_try_evict(struct gfs2_glock *gl) { struct gfs2_inode *ip; - bool evicted = false; /* * If there is contention on the iopen glock and we have an inode, try - * to grab and release the inode so that it can be evicted. This will - * allow the remote node to go ahead and delete the inode without us - * having to do it, which will avoid rgrp glock thrashing. + * to grab and release the inode so that it can be evicted. The + * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode + * should not be deleted locally. This will allow the remote node to + * go ahead and delete the inode without us having to do it, which will + * avoid rgrp glock thrashing. * * The remote node is likely still holding the corresponding inode * glock, so it will run before we get to verify that the delete has - * happened below. + * happened below. (Verification is triggered by the call to + * gfs2_queue_verify_delete() in gfs2_evict_inode().) */ spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; @@ -992,8 +995,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) ip = NULL; spin_unlock(&gl->gl_lockref.lock); if (ip) { - gl->gl_no_formal_ino = ip->i_no_formal_ino; - set_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + wait_on_inode(&ip->i_inode); + if (is_bad_inode(&ip->i_inode)) { + iput(&ip->i_inode); + ip = NULL; + } + } + if (ip) { + set_bit(GIF_DEFER_DELETE, &ip->i_flags); d_prune_aliases(&ip->i_inode); iput(&ip->i_inode); @@ -1001,7 +1010,7 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) spin_lock(&gl->gl_lockref.lock); ip = gl->gl_object; if (ip) { - clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags); + clear_bit(GIF_DEFER_DELETE, &ip->i_flags); if (!igrab(&ip->i_inode)) ip = NULL; } @@ -1010,9 +1019,7 @@ static bool gfs2_try_evict(struct gfs2_glock *gl) gfs2_glock_poke(ip->i_gl); iput(&ip->i_inode); } - evicted = !ip; } - return evicted; } bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) @@ -1021,18 +1028,18 @@ bool gfs2_queue_try_to_evict(struct gfs2_glock *gl) if (test_and_set_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) return false; - return queue_delayed_work(sdp->sd_delete_wq, - &gl->gl_delete, 0); + return !mod_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, 0); } -static bool gfs2_queue_verify_evict(struct gfs2_glock *gl) +bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + unsigned long delay; - if (test_and_set_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) + if (test_and_set_bit(GLF_VERIFY_DELETE, &gl->gl_flags)) return false; - return queue_delayed_work(sdp->sd_delete_wq, - &gl->gl_delete, 5 * HZ); + delay = later ? HZ + get_random_long() % (HZ * 9) : 0; + return queue_delayed_work(sdp->sd_delete_wq, &gl->gl_delete, delay); } static void delete_work_func(struct work_struct *work) @@ -1040,43 +1047,21 @@ static void delete_work_func(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete); struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct inode *inode; - u64 no_addr = gl->gl_name.ln_number; + bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); - if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) { - /* - * If we can evict the inode, give the remote node trying to - * delete the inode some time before verifying that the delete - * has happened. Otherwise, if we cause contention on the inode glock - * immediately, the remote node will think that we still have - * the inode in use, and so it will give up waiting. - * - * If we can't evict the inode, signal to the remote node that - * the inode is still in use. We'll later try to delete the - * inode locally in gfs2_evict_inode. - * - * FIXME: We only need to verify that the remote node has - * deleted the inode because nodes before this remote delete - * rework won't cooperate. At a later time, when we no longer - * care about compatibility with such nodes, we can skip this - * step entirely. - */ - if (gfs2_try_evict(gl)) { - if (test_bit(SDF_DEACTIVATING, &sdp->sd_flags)) - goto out; - if (gfs2_queue_verify_evict(gl)) - return; - } - goto out; - } + if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) + gfs2_try_evict(gl); + + if (verify_delete) { + u64 no_addr = gl->gl_name.ln_number; + struct inode *inode; - if (test_and_clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags)) { inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino, GFS2_BLKST_UNLINKED); if (IS_ERR(inode)) { if (PTR_ERR(inode) == -EAGAIN && - !test_bit(SDF_DEACTIVATING, &sdp->sd_flags) && - gfs2_queue_verify_evict(gl)) + !test_bit(SDF_KILL, &sdp->sd_flags) && + gfs2_queue_verify_delete(gl, true)) return; } else { d_prune_aliases(inode); @@ -1084,7 +1069,6 @@ static void delete_work_func(struct work_struct *work) } } -out: gfs2_glock_put(gl); } @@ -1111,7 +1095,7 @@ static void glock_work_func(struct work_struct *work) if (!delay) { clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - gfs2_set_demote(gl); + gfs2_set_demote(GLF_DEMOTE, gl); } } run_queue(gl, 0); @@ -1458,10 +1442,7 @@ int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) static void handle_callback(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote) { - if (delay) - set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - else - gfs2_set_demote(gl); + gfs2_set_demote(delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE, gl); if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; @@ -1642,12 +1623,6 @@ int gfs2_glock_poll(struct gfs2_holder *gh) return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1; } -static inline bool needs_demote(struct gfs2_glock *gl) -{ - return (test_bit(GLF_DEMOTE, &gl->gl_flags) || - test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); -} - static void __gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; @@ -1656,8 +1631,8 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) /* * This holder should not be cached, so mark it for demote. - * Note: this should be done before the check for needs_demote - * below. + * Note: this should be done before the glock_needs_demote + * check below. */ if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1670,7 +1645,7 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) * If there hasn't been a demote request we are done. * (Let the remaining holders, if any, keep holding it.) */ - if (!needs_demote(gl)) { + if (!glock_needs_demote(gl)) { if (list_empty(&gl->gl_holders)) fast_path = 1; } @@ -2127,7 +2102,7 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) void gfs2_cancel_delete_work(struct gfs2_glock *gl) { clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags); - clear_bit(GLF_VERIFY_EVICT, &gl->gl_flags); + clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); if (cancel_delayed_work(&gl->gl_delete)) gfs2_glock_put(gl); } @@ -2366,7 +2341,7 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'N'; if (test_bit(GLF_TRY_TO_EVICT, gflags)) *p++ = 'e'; - if (test_bit(GLF_VERIFY_EVICT, gflags)) + if (test_bit(GLF_VERIFY_DELETE, gflags)) *p++ = 'E'; *p = 0; return buf; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 944705747cd37..244bb7f68576d 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -268,6 +268,7 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); extern bool gfs2_queue_try_to_evict(struct gfs2_glock *gl); +extern bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later); extern void gfs2_cancel_delete_work(struct gfs2_glock *gl); extern void gfs2_flush_delete_work(struct gfs2_sbd *sdp); extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); @@ -308,4 +309,10 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh) extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation); extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation); +static inline bool glock_needs_demote(struct gfs2_glock *gl) +{ + return (test_bit(GLF_DEMOTE, &gl->gl_flags) || + test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)); +} + #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 3f8b5900c6b10..1b13f904a51db 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -499,11 +499,18 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) static int inode_go_instantiate(struct gfs2_glock *gl) { struct gfs2_inode *ip = gl->gl_object; + struct gfs2_glock *io_gl; + int error; if (!ip) /* no inode to populate - read it in later */ return 0; - return gfs2_inode_refresh(ip); + error = gfs2_inode_refresh(ip); + if (error) + return error; + io_gl = ip->i_iopen_gh.gh_gl; + io_gl->gl_no_formal_ino = ip->i_no_formal_ino; + return 0; } static int inode_go_held(struct gfs2_holder *gh) @@ -640,7 +647,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; if (!remote || sb_rdonly(sdp->sd_vfs) || - test_bit(SDF_DEACTIVATING, &sdp->sd_flags)) + test_bit(SDF_KILL, &sdp->sd_flags)) return; if (gl->gl_demote_state == LM_ST_UNLOCKED && diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 15321b1db66e5..4d1d85a9d10b9 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -331,7 +331,7 @@ enum { GLF_BLOCKING = 15, GLF_FREEING = 16, /* Wait for glock to be freed */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ - GLF_VERIFY_EVICT = 18, /* iopen glocks only */ + GLF_VERIFY_DELETE = 18, /* iopen glocks only */ }; struct gfs2_glock { @@ -378,7 +378,7 @@ enum { GIF_SW_PAGED = 3, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, - GIF_DEFERRED_DELETE = 7, + GIF_DEFER_DELETE = 7, }; struct gfs2_inode { @@ -607,7 +607,7 @@ enum { SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are withdrawing */ - SDF_DEACTIVATING = 15, + SDF_KILL = 15, SDF_EVICTING = 16, SDF_FROZEN = 17, }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d5575918ab691..03a6bcaf71c15 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -742,6 +742,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; gfs2_cancel_delete_work(io_gl); + io_gl->gl_no_formal_ino = ip->i_no_formal_ino; retry: error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 54f5b7d85d6ea..8a3ba0963b7a3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1788,9 +1788,9 @@ static void gfs2_kill_sb(struct super_block *sb) /* * Flush and then drain the delete workqueue here (via * destroy_workqueue()) to ensure that any delete work that - * may be running will also see the SDF_DEACTIVATING flag. + * may be running will also see the SDF_KILL flag. */ - set_bit(SDF_DEACTIVATING, &sdp->sd_flags); + set_bit(SDF_KILL, &sdp->sd_flags); gfs2_flush_delete_work(sdp); destroy_workqueue(sdp->sd_delete_wq); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c79c74121ec08..7df5f97de1671 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1881,7 +1881,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip */ ip = gl->gl_object; - if (ip || !gfs2_queue_try_to_evict(gl)) + if (ip || !gfs2_queue_verify_delete(gl, false)) gfs2_glock_put(gl); else found++; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 1b16abc441c2a..db5abcc0f615a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -44,10 +44,10 @@ #include "xattr.h" #include "lops.h" -enum dinode_demise { - SHOULD_DELETE_DINODE, - SHOULD_NOT_DELETE_DINODE, - SHOULD_DEFER_EVICTION, +enum evict_behavior { + EVICT_SHOULD_DELETE, + EVICT_SHOULD_SKIP_DELETE, + EVICT_SHOULD_DEFER_DELETE, }; /** @@ -550,7 +550,7 @@ void gfs2_make_fs_ro(struct gfs2_sbd *sdp) { int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (!test_bit(SDF_DEACTIVATING, &sdp->sd_flags)) + if (!test_bit(SDF_KILL, &sdp->sd_flags)) gfs2_flush_delete_work(sdp); if (!log_write_allowed && current == sdp->sd_quotad_process) @@ -1038,7 +1038,7 @@ static int gfs2_drop_inode(struct inode *inode) if (inode->i_nlink && gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) + if (glock_needs_demote(gl)) clear_nlink(inode); } @@ -1053,7 +1053,7 @@ static int gfs2_drop_inode(struct inode *inode) struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; gfs2_glock_hold(gl); - if (!gfs2_queue_try_to_evict(gl)) + if (!gfs2_queue_verify_delete(gl, true)) gfs2_glock_put_async(gl); return 0; } @@ -1271,12 +1271,11 @@ static void gfs2_glock_put_eventually(struct gfs2_glock *gl) gfs2_glock_put(gl); } -static bool gfs2_upgrade_iopen_glock(struct inode *inode) +static enum evict_behavior gfs2_upgrade_iopen_glock(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder *gh = &ip->i_iopen_gh; - long timeout = 5 * HZ; int error; gh->gh_flags |= GL_NOCACHE; @@ -1287,9 +1286,9 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) * exclusive access to the iopen glock here. * * Otherwise, the other nodes holding the lock will be notified about - * our locking request. If they do not have the inode open, they are - * expected to evict the cached inode and release the lock, allowing us - * to proceed. + * our locking request (see iopen_go_callback()). If they do not have + * the inode open, they are expected to evict the cached inode and + * release the lock, allowing us to proceed. * * Otherwise, if they cannot evict the inode, they are expected to poke * the inode glock (note: not the iopen glock). We will notice that @@ -1300,32 +1299,27 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) * As a last resort, if another node keeps holding the iopen glock * without showing any activity on the inode glock, we will eventually * time out and fail the iopen glock upgrade. - * - * Note that we're passing the LM_FLAG_TRY_1CB flag to the first - * locking request as an optimization to notify lock holders as soon as - * possible. Without that flag, they'd be notified implicitly by the - * second locking request. */ - gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, gh); - error = gfs2_glock_nq(gh); - if (error != GLR_TRYFAILED) - return !error; - gfs2_holder_reinit(LM_ST_EXCLUSIVE, GL_ASYNC | GL_NOCACHE, gh); error = gfs2_glock_nq(gh); if (error) - return false; + return EVICT_SHOULD_SKIP_DELETE; - timeout = wait_event_interruptible_timeout(sdp->sd_async_glock_wait, + wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !test_bit(HIF_WAIT, &gh->gh_iflags) || - test_bit(GLF_DEMOTE, &ip->i_gl->gl_flags), - timeout); + glock_needs_demote(ip->i_gl), + 5 * HZ); if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) { gfs2_glock_dq(gh); - return false; + if (glock_needs_demote(ip->i_gl)) + return EVICT_SHOULD_SKIP_DELETE; + return EVICT_SHOULD_DEFER_DELETE; } - return gfs2_glock_holder_ready(gh) == 0; + error = gfs2_glock_holder_ready(gh); + if (error) + return EVICT_SHOULD_SKIP_DELETE; + return EVICT_SHOULD_DELETE; } /** @@ -1338,8 +1332,8 @@ static bool gfs2_upgrade_iopen_glock(struct inode *inode) * * Returns: the fate of the dinode */ -static enum dinode_demise evict_should_delete(struct inode *inode, - struct gfs2_holder *gh) +static enum evict_behavior evict_should_delete(struct inode *inode, + struct gfs2_holder *gh) { struct gfs2_inode *ip = GFS2_I(inode); struct super_block *sb = inode->i_sb; @@ -1349,12 +1343,12 @@ static enum dinode_demise evict_should_delete(struct inode *inode, if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) goto should_delete; - if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags)) - return SHOULD_DEFER_EVICTION; + if (test_bit(GIF_DEFER_DELETE, &ip->i_flags)) + return EVICT_SHOULD_DEFER_DELETE; /* Deletes should never happen under memory pressure anymore. */ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) - return SHOULD_DEFER_EVICTION; + return EVICT_SHOULD_DEFER_DELETE; /* Must not read inode block until block type has been verified */ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, gh); @@ -1362,36 +1356,39 @@ static enum dinode_demise evict_should_delete(struct inode *inode, glock_clear_object(ip->i_iopen_gh.gh_gl, ip); ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); - return SHOULD_DEFER_EVICTION; + return EVICT_SHOULD_DEFER_DELETE; } if (gfs2_inode_already_deleted(ip->i_gl, ip->i_no_formal_ino)) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; ret = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED); if (ret) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) { ret = gfs2_instantiate(gh); if (ret) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; } /* * The inode may have been recreated in the meantime. */ if (inode->i_nlink) - return SHOULD_NOT_DELETE_DINODE; + return EVICT_SHOULD_SKIP_DELETE; should_delete: if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { - if (!gfs2_upgrade_iopen_glock(inode)) { + enum evict_behavior behavior = + gfs2_upgrade_iopen_glock(inode); + + if (behavior != EVICT_SHOULD_DELETE) { gfs2_holder_uninit(&ip->i_iopen_gh); - return SHOULD_NOT_DELETE_DINODE; + return behavior; } } - return SHOULD_DELETE_DINODE; + return EVICT_SHOULD_DELETE; } /** @@ -1502,6 +1499,7 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + enum evict_behavior behavior; int ret; if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) @@ -1516,10 +1514,20 @@ static void gfs2_evict_inode(struct inode *inode) goto out; gfs2_holder_mark_uninitialized(&gh); - ret = evict_should_delete(inode, &gh); - if (ret == SHOULD_DEFER_EVICTION) - goto out; - if (ret == SHOULD_DELETE_DINODE) + behavior = evict_should_delete(inode, &gh); + if (behavior == EVICT_SHOULD_DEFER_DELETE && + !test_bit(SDF_KILL, &sdp->sd_flags)) { + struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; + + if (io_gl) { + gfs2_glock_hold(io_gl); + if (!gfs2_queue_verify_delete(io_gl, true)) + gfs2_glock_put(io_gl); + goto out; + } + behavior = EVICT_SHOULD_DELETE; + } + if (behavior == EVICT_SHOULD_DELETE) ret = evict_unlinked_inode(inode); else ret = evict_linked_inode(inode); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c60bc7f628e19..3edee498ad0a7 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -118,7 +118,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_WITHDRAW_IN_PROG, &f), test_bit(SDF_REMOTE_WITHDRAW, &f), test_bit(SDF_WITHDRAW_RECOVERY, &f), - test_bit(SDF_DEACTIVATING, &f), + test_bit(SDF_KILL, &f), sdp->sd_log_error, rwsem_is_locked(&sdp->sd_log_flush_lock), sdp->sd_log_num_revoke, diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 732169d8a67a3..36a2cff134d27 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -37,8 +37,6 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst); extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); -extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, - __u64 ses_id); extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index a6218715984e6..51129bd3838de 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -74,7 +74,7 @@ smb311_crypto_shash_allocate(struct TCP_Server_Info *server) static -int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) +int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; struct TCP_Server_Info *pserver; @@ -168,16 +168,41 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) return NULL; } -struct cifs_ses * -smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +static int smb2_get_sign_key(struct TCP_Server_Info *server, + __u64 ses_id, u8 *key) { struct cifs_ses *ses; + int rc = -ENOENT; + + if (SERVER_IS_CHAN(server)) + server = server->primary_server; spin_lock(&cifs_tcp_ses_lock); - ses = smb2_find_smb_ses_unlocked(server, ses_id); - spin_unlock(&cifs_tcp_ses_lock); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (ses->Suid != ses_id) + continue; - return ses; + rc = 0; + spin_lock(&ses->ses_lock); + switch (ses->ses_status) { + case SES_EXITING: /* SMB2_LOGOFF */ + case SES_GOOD: + if (likely(ses->auth_key.response)) { + memcpy(key, ses->auth_key.response, + SMB2_NTLMV2_SESSKEY_SIZE); + } else { + rc = -EIO; + } + break; + default: + rc = -EAGAIN; + break; + } + spin_unlock(&ses->ses_lock); + break; + } + spin_unlock(&cifs_tcp_ses_lock); + return rc; } static struct cifs_tcon * @@ -234,14 +259,16 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, unsigned char *sigptr = smb2_signature; struct kvec *iov = rqst->rq_iov; struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base; - struct cifs_ses *ses; struct shash_desc *shash = NULL; struct smb_rqst drqst; + __u64 sid = le64_to_cpu(shdr->SessionId); + u8 key[SMB2_NTLMV2_SESSKEY_SIZE]; - ses = smb2_find_smb_ses(server, le64_to_cpu(shdr->SessionId)); - if (unlikely(!ses)) { - cifs_server_dbg(FYI, "%s: Could not find session\n", __func__); - return -ENOENT; + rc = smb2_get_sign_key(server, sid, key); + if (unlikely(rc)) { + cifs_server_dbg(FYI, "%s: [sesid=0x%llx] couldn't find signing key: %d\n", + __func__, sid, rc); + return rc; } memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); @@ -258,8 +285,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, shash = server->secmech.hmacsha256; } - rc = crypto_shash_setkey(shash->tfm, ses->auth_key.response, - SMB2_NTLMV2_SESSKEY_SIZE); + rc = crypto_shash_setkey(shash->tfm, key, sizeof(key)); if (rc) { cifs_server_dbg(VFS, "%s: Could not update with response\n", @@ -301,8 +327,6 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, out: if (allocate_crypto) cifs_free_hash(&shash); - if (ses) - cifs_put_smb_ses(ses); return rc; } @@ -568,7 +592,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; - rc = smb2_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); + rc = smb3_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); if (unlikely(rc)) { cifs_server_dbg(FYI, "%s: Could not get signing key\n", __func__); return rc; diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d4afa8508a806..601055cec9088 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -80,10 +80,12 @@ static __always_inline bool context_tracking_guest_enter(void) return context_tracking_enabled_this_cpu(); } -static __always_inline void context_tracking_guest_exit(void) +static __always_inline bool context_tracking_guest_exit(void) { if (context_tracking_enabled()) __ct_user_exit(CONTEXT_GUEST); + + return context_tracking_enabled_this_cpu(); } #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) @@ -97,7 +99,7 @@ static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } -static inline void context_tracking_guest_exit(void) { } +static __always_inline bool context_tracking_guest_exit(void) { return false; } #define CT_WARN_ON(cond) do { } while (0) #endif /* !CONFIG_CONTEXT_TRACKING_USER */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8cbef6ec64226..0974ca3dd5971 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -485,7 +485,15 @@ static __always_inline void guest_state_enter_irqoff(void) */ static __always_inline void guest_context_exit_irqoff(void) { - context_tracking_guest_exit(); + /* + * Guest mode is treated as a quiescent state, see + * guest_context_enter_irqoff() for more details. + */ + if (!context_tracking_guest_exit()) { + instrumentation_begin(); + rcu_virt_note_context_switch(); + instrumentation_end(); + } } /* diff --git a/include/linux/mm.h b/include/linux/mm.h index d36af5babad14..e1f2365802c07 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2269,18 +2269,6 @@ extern void pagefault_out_of_memory(void); #define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) #define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) -/* - * Flags passed to show_mem() and show_free_areas() to suppress output in - * various contexts. - */ -#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ - -extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); -static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask) -{ - __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); -} - /* * Parameter block passed down to zap_pte_range in exceptional cases. */ @@ -3046,9 +3034,9 @@ extern void mem_init(void); extern void __init mmap_init(void); extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx); -static inline void show_mem(unsigned int flags, nodemask_t *nodemask) +static inline void show_mem(void) { - __show_mem(flags, nodemask, MAX_NR_ZONES - 1); + __show_mem(0, NULL, MAX_NR_ZONES - 1); } extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f3d24766182f1..702cf3281791c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1562,6 +1562,7 @@ struct nft_trans_rule { (((struct nft_trans_rule *)trans->data)->bound) struct nft_trans_set { + struct list_head list_trans_newset; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1710,6 +1711,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head commit_set_list; struct list_head binding_list; struct list_head module_list; struct list_head notify_list; diff --git a/include/sound/sof.h b/include/sound/sof.h index 05213bb515a38..2a1ef39a20b89 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -173,5 +173,6 @@ struct sof_dev_desc { int sof_dai_get_mclk(struct snd_soc_pcm_runtime *rtd); int sof_dai_get_bclk(struct snd_soc_pcm_runtime *rtd); +int sof_dai_get_tdm_slots(struct snd_soc_pcm_runtime *rtd); #endif diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 154a87a1eca97..fcdfea767fca8 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -89,6 +89,9 @@ struct snp_ext_report_req { #define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0) #define SNP_GUEST_VMM_ERR_SHIFT 32 #define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT) +#define SNP_GUEST_FW_ERR(x) ((x) & SNP_GUEST_FW_ERR_MASK) +#define SNP_GUEST_ERR(vmm_err, fw_err) (SNP_GUEST_VMM_ERR(vmm_err) | \ + SNP_GUEST_FW_ERR(fw_err)) #define SNP_GUEST_VMM_ERR_INVALID_LEN 1 #define SNP_GUEST_VMM_ERR_BUSY 2 diff --git a/init/initramfs.c b/init/initramfs.c index a099659348769..0c51079170992 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -54,7 +54,7 @@ static void panic_show_mem(const char *fmt, ...) { va_list args; - show_mem(0, NULL); + show_mem(); va_start(args, fmt); panic(fmt, args); va_end(args); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index e20b90c361316..de3b681d1d13d 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -29,7 +29,7 @@ struct bpf_ringbuf { u64 mask; struct page **pages; int nr_pages; - spinlock_t spinlock ____cacheline_aligned_in_smp; + raw_spinlock_t spinlock ____cacheline_aligned_in_smp; /* For user-space producer ring buffers, an atomic_t busy bit is used * to synchronize access to the ring buffers in the kernel, rather than * the spinlock that is used for kernel-producer ring buffers. This is @@ -173,7 +173,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) if (!rb) return NULL; - spin_lock_init(&rb->spinlock); + raw_spin_lock_init(&rb->spinlock); atomic_set(&rb->busy, 0); init_waitqueue_head(&rb->waitq); init_irq_work(&rb->work, bpf_ringbuf_notify); @@ -421,10 +421,10 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) cons_pos = smp_load_acquire(&rb->consumer_pos); if (in_nmi()) { - if (!spin_trylock_irqsave(&rb->spinlock, flags)) + if (!raw_spin_trylock_irqsave(&rb->spinlock, flags)) return NULL; } else { - spin_lock_irqsave(&rb->spinlock, flags); + raw_spin_lock_irqsave(&rb->spinlock, flags); } pend_pos = rb->pending_pos; @@ -450,7 +450,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) */ if (new_prod_pos - cons_pos > rb->mask || new_prod_pos - pend_pos > rb->mask) { - spin_unlock_irqrestore(&rb->spinlock, flags); + raw_spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } @@ -462,7 +462,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) /* pairs with consumer's smp_load_acquire() */ smp_store_release(&rb->producer_pos, new_prod_pos); - spin_unlock_irqrestore(&rb->spinlock, flags); + raw_spin_unlock_irqrestore(&rb->spinlock, flags); return (void *)hdr + BPF_RINGBUF_HDR_SZ; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 5fcd77eb305c4..ad14b0d192c26 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1255,8 +1255,9 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::mutex * perf_event::child_mutex; * perf_event_context::lock - * perf_event::mmap_mutex * mmap_lock + * perf_event::mmap_mutex + * perf_buffer::aux_mutex * perf_addr_filters_head::lock * * cpu_hotplug_lock @@ -6351,12 +6352,11 @@ static void perf_mmap_close(struct vm_area_struct *vma) event->pmu->event_unmapped(event, vma->vm_mm); /* - * rb->aux_mmap_count will always drop before rb->mmap_count and - * event->mmap_count, so it is ok to use event->mmap_mutex to - * serialize with perf_mmap here. + * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex + * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6373,7 +6373,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount)); - mutex_unlock(&event->mmap_mutex); + mutex_unlock(&rb->aux_mutex); } if (atomic_dec_and_test(&rb->mmap_count)) @@ -6461,6 +6461,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; unsigned long vma_size; @@ -6507,6 +6508,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock; + aux_mutex = &rb->aux_mutex; + mutex_lock(aux_mutex); + aux_offset = READ_ONCE(rb->user_page->aux_offset); aux_size = READ_ONCE(rb->user_page->aux_size); @@ -6657,6 +6661,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&rb->mmap_count); } aux_unlock: + if (aux_mutex) + mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); /* diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 5150d5f84c033..b59d748c0bc78 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,6 +40,7 @@ struct perf_buffer { struct user_struct *mmap_user; /* AUX area */ + RH_KABI_BROKEN_INSERT(struct mutex aux_mutex) long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 4a260ceed9c73..690c53e015e57 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -332,6 +332,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) */ if (!rb->nr_pages) rb->paused = 1; + + mutex_init(&rb->aux_mutex); } void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) diff --git a/kernel/panic.c b/kernel/panic.c index 638b57d50a4f2..54493e35519a6 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -215,7 +215,7 @@ static void panic_print_sys_info(bool console_flush) show_state(); if (panic_print & PANIC_PRINT_MEM_INFO) - show_mem(0, NULL); + show_mem(); if (panic_print & PANIC_PRINT_TIMER_INFO) sysrq_timer_list_show(); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 32ce38e4439b7..ee1f4e546b6fa 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -172,6 +172,9 @@ static int gp_init_delay; module_param(gp_init_delay, int, 0444); static int gp_cleanup_delay; module_param(gp_cleanup_delay, int, 0444); +static int nohz_full_patience_delay; +module_param(nohz_full_patience_delay, int, 0444); +static int nohz_full_patience_delay_jiffies; // Add delay to rcu_read_unlock() for strict grace periods. static int rcu_unlock_delay; @@ -3124,10 +3127,10 @@ schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) if (delayed_work_pending(&krcp->monitor_work)) { delay_left = krcp->monitor_work.timer.expires - jiffies; if (delay < delay_left) - mod_delayed_work(system_wq, &krcp->monitor_work, delay); + mod_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); return; } - queue_delayed_work(system_wq, &krcp->monitor_work, delay); + queue_delayed_work(system_unbound_wq, &krcp->monitor_work, delay); } static void @@ -3219,7 +3222,7 @@ static void kfree_rcu_monitor(struct work_struct *work) // be that the work is in the pending state when // channels have been detached following by each // other. - queue_rcu_work(system_wq, &krwp->rcu_work); + queue_rcu_work(system_unbound_wq, &krwp->rcu_work); } } @@ -3289,7 +3292,7 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp) if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING && !atomic_xchg(&krcp->work_in_progress, 1)) { if (atomic_read(&krcp->backoff_page_cache_fill)) { - queue_delayed_work(system_wq, + queue_delayed_work(system_unbound_wq, &krcp->page_cache_work, msecs_to_jiffies(rcu_delay_page_cache_fill_msec)); } else { @@ -3902,11 +3905,15 @@ static int rcu_pending(int user) return 1; /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */ - if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu()) + gp_in_progress = rcu_gp_in_progress(); + if ((user || rcu_is_cpu_rrupt_from_idle() || + (gp_in_progress && + time_before(jiffies, READ_ONCE(rcu_state.gp_start) + + nohz_full_patience_delay_jiffies))) && + rcu_nohz_full_cpu()) return 0; /* Is the RCU core waiting for a quiescent state from this CPU? */ - gp_in_progress = rcu_gp_in_progress(); if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress) return 1; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 41021080ad258..de239fbf7fb88 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -93,6 +93,16 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); if (gp_cleanup_delay) pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay); + if (nohz_full_patience_delay < 0) { + pr_info("\tRCU NOCB CPU patience negative (%d), resetting to zero.\n", nohz_full_patience_delay); + nohz_full_patience_delay = 0; + } else if (nohz_full_patience_delay > 5 * MSEC_PER_SEC) { + pr_info("\tRCU NOCB CPU patience too large (%d), resetting to %ld.\n", nohz_full_patience_delay, 5 * MSEC_PER_SEC); + nohz_full_patience_delay = 5 * MSEC_PER_SEC; + } else if (nohz_full_patience_delay) { + pr_info("\tRCU NOCB CPU patience set to %d milliseconds.\n", nohz_full_patience_delay); + } + nohz_full_patience_delay_jiffies = msecs_to_jiffies(nohz_full_patience_delay); if (!use_softirq) pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n"); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) diff --git a/kernel/rh_messages.h b/kernel/rh_messages.h index d02c47d8130a4..4b5c727c07ffc 100644 --- a/kernel/rh_messages.h +++ b/kernel/rh_messages.h @@ -79,7 +79,6 @@ static const char *rh_unmaintained_drivers[] = { "e1000", "ebtables", "hdlc_fr", - "hfi1", "hpsa", "ip6_tables", "ip_set", @@ -131,7 +130,6 @@ static const char *rh_unmaintained_init_fns[] = { "siw_init_module", "usnic_ib_init", "pvrdma_init", - "hfi1_mod_init", "nfp_main_init", 0 /* Terminating entry */ }; diff --git a/mm/internal.h b/mm/internal.h index 7a97bf413cb82..14483a717f491 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -61,6 +61,12 @@ void page_writeback_init(void); #define COMPOUND_MAPPED 0x800000 #define FOLIO_PAGES_MAPPED (COMPOUND_MAPPED - 1) +/* + * Flags passed to __show_mem() and show_free_areas() to suppress output in + * various contexts. + */ +#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */ + /* * How many individual pages have an elevated _mapcount. Excludes * the folio's entire_mapcount. diff --git a/mm/nommu.c b/mm/nommu.c index f670d9979a261..bff51d8ec66e0 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -990,7 +990,7 @@ static int do_mmap_private(struct vm_area_struct *vma, enomem: pr_err("Allocation of length %lu from process %d (%s) failed\n", len, current->pid, current->comm); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; } @@ -1223,20 +1223,20 @@ unsigned long do_mmap(struct file *file, kmem_cache_free(vm_region_jar, region); pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n", len, current->pid); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; error_getting_region: pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n", len, current->pid); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; error_vma_iter_prealloc: kmem_cache_free(vm_region_jar, region); vm_area_free(vma); pr_warn("Allocation of vma tree for process %d failed\n", current->pid); - show_free_areas(0, NULL); + show_mem(); return -ENOMEM; } diff --git a/mm/show_mem.c b/mm/show_mem.c index 01f8e99058173..09c7d036d49ec 100644 --- a/mm/show_mem.c +++ b/mm/show_mem.c @@ -186,7 +186,7 @@ static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx) * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ -void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) +static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long free_pcp = 0; int cpu, nid; @@ -406,7 +406,7 @@ void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) struct zone *zone; printk("Mem-Info:\n"); - __show_free_areas(filter, nodemask, max_zone_idx); + show_free_areas(filter, nodemask, max_zone_idx); for_each_populated_zone(zone) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 4f28445dcf3fc..94a4ce8075d41 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -203,6 +203,38 @@ static int mfill_atomic_pte_copy(pmd_t *dst_pmd, goto out; } +static int mfill_atomic_pte_zeroed_folio(pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr) +{ + struct folio *folio; + int ret = -ENOMEM; + + folio = vma_alloc_zeroed_movable_folio(dst_vma, dst_addr); + if (!folio) + return ret; + + if (mem_cgroup_charge(folio, dst_vma->vm_mm, GFP_KERNEL)) + goto out_put; + + /* + * The memory barrier inside __folio_mark_uptodate makes sure that + * zeroing out the folio become visible before mapping the page + * using set_pte_at(). See do_anonymous_page(). + */ + __folio_mark_uptodate(folio); + + ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, + &folio->page, true, 0); + if (ret) + goto out_put; + + return 0; +out_put: + folio_put(folio); + return ret; +} + static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr) @@ -213,6 +245,9 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd, pgoff_t offset, max_off; struct inode *inode; + if (mm_forbids_zeropage(dst_vma->vm_mm)) + return mfill_atomic_pte_zeroed_folio(dst_pmd, dst_vma, dst_addr); + _dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr), dst_vma->vm_page_prot)); ret = -EAGAIN; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3b52f56bbdde0..28779369bbf82 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -791,21 +791,31 @@ static bool reqsk_queue_unlink(struct request_sock *req) found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); spin_unlock(lock); } - if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) - reqsk_put(req); + return found; } -bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +static bool __inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + bool from_timer) { bool unlinked = reqsk_queue_unlink(req); + if (!from_timer && timer_delete_sync(&req->rsk_timer)) + reqsk_put(req); + if (unlinked) { reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); reqsk_put(req); } + return unlinked; } + +bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + return __inet_csk_reqsk_queue_drop(sk, req, false); +} EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) @@ -901,7 +911,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ - inet_csk_reqsk_queue_drop(sk_listener, nreq); + __inet_csk_reqsk_queue_drop(sk_listener, nreq, true); goto no_ownership; } @@ -927,7 +937,8 @@ static void reqsk_timer_handler(struct timer_list *t) } drop: - inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); + __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + reqsk_put(oreq); } static void reqsk_queue_hash_req(struct request_sock *req, diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 498a0c35b7bb2..815b1df0b2d19 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_path *path = iucv->path; - if (iucv->path) { - iucv->path = NULL; + /* Whoever resets the path pointer, must sever and free it. */ + if (xchg(&iucv->path, NULL)) { if (with_user_data) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bc1ce6b89e7fa..81839b93286f5 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2058,7 +2058,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) slow = lock_sock_fast(ssk); WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); tcp_sk(ssk)->window_clamp = window_clamp; - tcp_cleanup_rbuf(ssk, 1); + if (tcp_can_send_ack(ssk)) + tcp_cleanup_rbuf(ssk, 1); unlock_sock_fast(ssk, slow); } } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c82ad7ab90a71..ee5e246c4a0a5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -374,12 +374,17 @@ static void nf_tables_unregister_hook(struct net *net, static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_set *trans_set; switch (trans->msg_type) { case NFT_MSG_NEWSET: + trans_set = (struct nft_trans_set *)trans->data; + if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) list_add_tail(&trans->binding_list, &nft_net->binding_list); + + list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && @@ -562,12 +567,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { + struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; + trans_set = (struct nft_trans_set *)trans->data; + INIT_LIST_HEAD(&trans_set->list_trans_newset); + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); @@ -4204,17 +4213,16 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, { struct nftables_pernet *nft_net = nft_pernet(net); u32 id = ntohl(nla_get_be32(nla)); - struct nft_trans *trans; + struct nft_trans_set *trans; - list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET) { - struct nft_set *set = nft_trans_set(trans); + /* its likely the id we need is at the tail, not at start */ + list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) { + struct nft_set *set = trans->set; - if (id == nft_trans_set_id(trans) && - set->table == table && - nft_active_genmask(set, genmask)) - return set; - } + if (id == trans->set_id && + set->table == table && + nft_active_genmask(set, genmask)) + return set; } return ERR_PTR(-ENOENT); } @@ -9915,6 +9923,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: + list_del(&((struct nft_trans_set *)trans->data)->list_trans_newset); if (nft_trans_set_update(trans)) { struct nft_set *set = nft_trans_set(trans); @@ -10196,6 +10205,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + list_del(&((struct nft_trans_set *)trans->data)->list_trans_newset); if (nft_trans_set_update(trans)) { nft_trans_destroy(trans); break; @@ -10291,6 +10301,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } } + WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); + nft_set_abort_update(&set_update_list); synchronize_rcu(); @@ -10919,8 +10931,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nf_tables_destroy_list)) - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -10962,6 +10973,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); @@ -10992,6 +11004,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 83aae2bcd4950..47736abe01350 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -432,7 +432,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, res_map = scratch->map + (map_index ? m->bsize_max : 0); fill_map = scratch->map + (map_index ? 0 : m->bsize_max); - memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); + pipapo_resmap_init(m, res_map); nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; @@ -540,7 +540,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; } - memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); + pipapo_resmap_init(m, res_map); nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index bd45d14a75647..943e15c12327a 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -287,4 +287,25 @@ static u64 pipapo_estimate_size(const struct nft_set_desc *desc) return size; } +/** + * pipapo_resmap_init() - Initialise result map before first use + * @m: Matching data, including mapping table + * @res_map: Result map + * + * Initialize all bits covered by the first field to one, so that after + * the first step, only the matching bits of the first bit group remain. + * + * If other fields have a large bitmap, set remainder of res_map to 0. + */ +static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map) +{ + const struct nft_pipapo_field *f = m->f; + int i; + + for (i = 0; i < f->bsize; i++) + res_map[i] = ULONG_MAX; + + for (i = f->bsize; i < m->bsize_max; i++) + res_map[i] = 0ul; +} #endif /* _NFT_SET_PIPAPO_H */ diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index ef24c41482a32..dfae90cd34939 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1036,6 +1036,7 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, /** * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes + * @mdata: Matching data, including mapping table * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables @@ -1051,7 +1052,8 @@ static int nft_pipapo_avx2_lookup_8b_16(unsigned long *map, unsigned long *fill, * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ -static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, +static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata, + unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) @@ -1062,7 +1064,7 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, lt += offset * NFT_PIPAPO_LONGS_PER_M256; if (first) - memset(map, 0xff, bsize * sizeof(*map)); + pipapo_resmap_init(mdata, map); for (i = offset; i < bsize; i++) { if (f->bb == 8) @@ -1139,8 +1141,14 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, bool map_index; int i, ret = 0; - if (unlikely(!irq_fpu_usable())) - return nft_pipapo_lookup(net, set, key, ext); + local_bh_disable(); + + if (unlikely(!irq_fpu_usable())) { + bool fallback_res = nft_pipapo_lookup(net, set, key, ext); + + local_bh_enable(); + return fallback_res; + } m = rcu_dereference(priv->match); @@ -1155,6 +1163,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); + local_bh_enable(); return false; } @@ -1188,7 +1197,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, } else if (f->groups == 16) { NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); } else { - ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, ret, rp, first, last); } @@ -1204,7 +1213,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, } else if (f->groups == 32) { NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); } else { - ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, ret, rp, first, last); } @@ -1235,6 +1244,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, if (i % 2) scratch->map_index = !map_index; kernel_fpu_end(); + local_bh_enable(); return ret >= 0; } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 4199cc2a5ebc9..59493dca6fdec 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -9,7 +9,8 @@ struct nft_socket { enum nft_socket_keys key:8; - u8 level; + u8 level; /* cgroupv2 level to extract */ + u8 level_user; /* cgroupv2 level provided by userspace */ union { u8 dreg; }; @@ -52,6 +53,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo memcpy(dest, &cgid, sizeof(u64)); return true; } + +/* process context only, uses current->nsproxy. */ +static noinline int nft_socket_cgroup_subtree_level(void) +{ + struct cgroup *cgrp = cgroup_get_from_path("/"); + int level; + + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + level = cgrp->level; + + cgroup_put(cgrp); + + if (WARN_ON_ONCE(level > 255)) + return -ERANGE; + + if (WARN_ON_ONCE(level < 0)) + return -EINVAL; + + return level; +} #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) @@ -109,13 +132,13 @@ static void nft_socket_eval(const struct nft_expr *expr, *dest = sk->sk_mark; } else { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; @@ -123,7 +146,7 @@ static void nft_socket_eval(const struct nft_expr *expr, case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; #endif @@ -132,6 +155,7 @@ static void nft_socket_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } @@ -172,9 +196,10 @@ static int nft_socket_init(const struct nft_ctx *ctx, case NFT_SOCKET_MARK: len = sizeof(u32); break; -#ifdef CONFIG_CGROUPS +#ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; + int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; @@ -183,6 +208,17 @@ static int nft_socket_init(const struct nft_ctx *ctx, if (level > 255) return -EOPNOTSUPP; + err = nft_socket_cgroup_subtree_level(); + if (err < 0) + return err; + + priv->level_user = level; + + level += err; + /* Implies a giant cgroup tree */ + if (WARN_ON_ONCE(level > 255)) + return -EOPNOTSUPP; + priv->level = level; len = sizeof(u64); break; @@ -206,7 +242,7 @@ static int nft_socket_dump(struct sk_buff *skb, if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && - nla_put_u32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) + nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } diff --git a/redhat/kernel.changelog-9.5 b/redhat/kernel.changelog-9.5 index 86cf28369137d..35e177b9ebaa9 100644 --- a/redhat/kernel.changelog-9.5 +++ b/redhat/kernel.changelog-9.5 @@ -1,3 +1,100 @@ +* Wed Jan 15 2025 Chao YE [5.14.0-503.22.1.el9_5] +- [s390] zcore: WRITE is "data source", not destination... (CKI Backport Bot) [RHEL-63078] +- arm64/sve: Discard stale CPU state when handling SVE traps (Mark Salter) [RHEL-72218] {CVE-2024-50275} +- bpf: Use raw_spinlock_t in ringbuf (Luis Claudio R. Goncalves) [RHEL-68992 RHEL-20608] +- rh_messages.h: un-unmaintain hfi1 (CKI Backport Bot) [RHEL-71322] +- scsi: lpfc: Validate hdwq pointers before dereferencing in reset/errata paths (Dick Kennedy) [RHEL-66055 RHEL-53595] +- scsi: lpfc: Handle mailbox timeouts in lpfc_get_sfp_info (Dick Kennedy) [RHEL-66055 RHEL-53595] +- ASoC: SOF: ipc4-topology: Preserve the DMA Link ID for ChainDMA on unprepare (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Only handle dai_config with HW_PARAMS for ChainDMA (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Use single token list for the copiers (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Use correct queue_id for requesting input pin format (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Adjust the params based on DAI formats (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Improve readability of sof_ipc4_prepare_dai_copier() (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology/pcm: Rename sof_ipc4_copier_is_single_format() (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Print out the channel count in sof_ipc4_dbg_audio_format (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: support tdm slot number query (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc3-topology: support tdm slot number query (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: sof-audio: add sof_dai_get_tdm_slots function (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: sof-audio: rename dai clock setting query function (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Add support for NHLT with 16-bit only DMIC blob (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Correct DAI copier config and NHLT blob request (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: Allow selective update in sof_ipc4_update_hw_params (Jaroslav Kysela) [RHEL-62030] +- ASoC: SOF: ipc4-topology: remove shadowed variable (Jaroslav Kysela) [RHEL-62030] +- Revert "ixgbe: Manual AN-37 for troublesome link partners for X550 SFI" (Ivan Vecera) [RHEL-65624] +- KVM: SVM: Propagate error from snp_guest_req_init() to userspace (Bandan Das) [RHEL-68361 RHEL-65840] +- KVM: SEV: Provide support for SNP_EXTENDED_GUEST_REQUEST NAE event (Bandan Das) [RHEL-68361 RHEL-65840] +- x86/sev: Move sev_guest.h into common SEV header (Bandan Das) [RHEL-68361 RHEL-65840] +- KVM: SEV: Provide support for SNP_GUEST_REQUEST NAE event (Bandan Das) [RHEL-68361 RHEL-65840] +- i40e: fix race condition by adding filter's intermediate sync state (CKI Backport Bot) [RHEL-69809] {CVE-2024-53088} +- ice: fix truesize operations for PAGE_SIZE >= 8192 (CKI Backport Bot) [RHEL-70660 RHEL-37905] +- ice: fix ICE_LAST_OFFSET formula (CKI Backport Bot) [RHEL-70660 RHEL-37905] +- ice: fix page reuse when PAGE_SIZE is over 8k (CKI Backport Bot) [RHEL-70660 RHEL-37905] +- nvme-fabrics: handle zero MAXCMD without closing the connection (Maurizio Lombardi) [RHEL-72970] +- selftests: netfilter: add test case for recent mismatch bug (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nf_tables: unconditionally flush pending work before notifier (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nft_set_pipapo_avx2: disable softinterrupts (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nf_set_pipapo: fix initial map fill (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nf_tables: store new sets in dedicated list (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nft_socket: Fix a NULL vs IS_ERR() bug in nft_socket_cgroup_subtree_level() (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nft_socket: make cgroupsv2 matching work with namespaces (Florian Westphal) [RHEL-71357 RHEL-60554] +- netfilter: nft_socket: fix sk refcount leaks (Florian Westphal) [RHEL-71357 RHEL-60554] +- tcp: Fix use-after-free of nreq in reqsk_timer_handler(). (Guillaume Nault) [RHEL-70541 RHEL-70449] +- tcp/dccp: Don't use timer_pending() in reqsk_queue_unlink(). (Guillaume Nault) [RHEL-66329 RHEL-66328] {CVE-2024-50154} +- rcu: Use system_unbound_wq to avoid disturbing isolated CPUs (Waiman Long) [RHEL-61329 RHEL-50220] +- scsi: storvsc: Do not flag MAINTENANCE_IN return of SRB_STATUS_DATA_OVERRUN as an error (Cathy Avery) [RHEL-71393 RHEL-9848] +- scsi: storvsc: Handle additional SRB status values (Cathy Avery) [RHEL-71393 RHEL-9848] +Resolves: RHEL-61329, RHEL-62030, RHEL-63078, RHEL-65624, RHEL-66055, RHEL-66329, RHEL-68361, RHEL-68992, RHEL-69809, RHEL-70541, RHEL-70660, RHEL-71322, RHEL-71357, RHEL-71393, RHEL-72218, RHEL-72970 + +* Thu Dec 19 2024 Lucas Zampieri [5.14.0-503.21.1.el9_5] +- mlxsw: spectrum_ipip: Fix memory leak when changing remote IPv6 address (CKI Backport Bot) [RHEL-66899] {CVE-2024-50252} +- CVE-2024-53122 mptcp: cope racing subflow creation in mptcp_rcv_space_adjust (Patrick Talbert) [RHEL-70083 RHEL-69670] {CVE-2024-53122} +- mm: make show_free_areas() static (Aristeu Rozanski) [RHEL-66998 RHEL-27743] +- mm: remove arguments of show_mem() (Aristeu Rozanski) [RHEL-66998 RHEL-27743] +- KVM: s390: Change virtual to physical address access in diag 0x258 handler (Thomas Huth) [RHEL-67922 RHEL-65229] +- KVM: s390: gaccess: Check if guest address is in memslot (Thomas Huth) [RHEL-67922 RHEL-65229] +- KVM: s390: Fix SORTL and DFLTCC instruction format error in __insn32_query (Thomas Huth) [RHEL-67922 RHEL-65229] +- s390/uv: Panic for set and remove shared access UVC errors (Thomas Huth) [RHEL-67922 RHEL-65229] +- KVM: s390: remove useless include (Thomas Huth) [RHEL-67922 RHEL-65229] +- s390/mm: Re-enable the shared zeropage for !PV and !skeys KVM guests (Thomas Huth) [RHEL-67922 RHEL-65229] +- mm/userfaultfd: Do not place zeropages when zeropages are disallowed (Thomas Huth) [RHEL-67922 RHEL-65229] +- s390: allow pte_offset_map_lock() to fail (Thomas Huth) [RHEL-67922 RHEL-54248] +- KVM: s390: vsie: Use virt_to_phys for crypto control block (Thomas Huth) [RHEL-67922 RHEL-65229] +- KVM: s390: vsie: Use virt_to_phys for facility control block (Thomas Huth) [RHEL-67922 RHEL-65229] +- gfs2: Prevent inode creation race (Andreas Gruenbacher) [RHEL-68137 RHEL-68102] +- gfs2: Only defer deletes when we have an iopen glock (Andreas Gruenbacher) [RHEL-68137 RHEL-68102] +- gfs2: Randomize GLF_VERIFY_DELETE work delay (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Use mod_delayed_work in gfs2_queue_try_to_evict (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Update to the evict / remote delete documentation (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Call gfs2_queue_verify_delete from gfs2_evict_inode (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Clean up delete work processing (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Minor delete_work_func cleanup (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Return enum evict_behavior from gfs2_upgrade_iopen_glock (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Rename dinode_demise to evict_behavior (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Rename GIF_{DEFERRED -> DEFER}_DELETE (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Faster gfs2_upgrade_iopen_glock wakeups (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Fix unlinked inode cleanup (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Allow immediate GLF_VERIFY_DELETE work (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Initialize gl_no_formal_ino earlier (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Rename GLF_VERIFY_EVICT to GLF_VERIFY_DELETE (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: make timeout values more explicit (Wolfram Sang) [RHEL-62105 RHEL-60945] +- gfs2: Simplify function gfs2_upgrade_iopen_glock (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- gfs2: Rename SDF_DEACTIVATING to SDF_KILL (Andreas Gruenbacher) [RHEL-62105 RHEL-60945] +- smb: client: fix use-after-free of signing key (Jay Shin) [RHEL-69306 RHEL-66206] +- net/iucv: fix use after free in iucv_sock_close() (Mete Durlu) [RHEL-60300 RHEL-53992] +- KVM: arm64: Ensure vgic_ready() is ordered against MMIO registration (CKI Backport Bot) [RHEL-70294] +Resolves: RHEL-60300, RHEL-62105, RHEL-66899, RHEL-66998, RHEL-67922, RHEL-68137, RHEL-69306, RHEL-70083, RHEL-70294 + +* Thu Dec 12 2024 Lucas Zampieri [5.14.0-503.20.1.el9_5] +- bnxt_en: Add support for user configured RSS key (Michal Schmidt) [RHEL-68699 RHEL-54645] +- bnxt_en: Add function to calculate Toeplitz hash (Michal Schmidt) [RHEL-68699 RHEL-54645] +- kvm: Note an RCU quiescent state on guest exit (Leonardo Bras) [RHEL-65734 RHEL-20288] +- rcu: Add rcutree.nohz_full_patience_delay to reduce nohz_full OS jitter (Leonardo Bras) [RHEL-65734 RHEL-20288] +- context_tracking: Fix KCSAN noinstr violation (Leonardo Bras) [RHEL-65734 RHEL-20288] +- perf/aux: Fix AUX buffer serialization (Michael Petlan) [RHEL-67495] {CVE-2024-46713} +- RDMA/bnxt_re: Fix a bug while setting up Level-2 PBL pages (Mohammad Heib) [RHEL-66669 RHEL-52759] {CVE-2024-50208} +Resolves: RHEL-65734, RHEL-66669, RHEL-67495, RHEL-68699 + * Fri Dec 06 2024 Lucas Zampieri [5.14.0-503.19.1.el9_5] - xfrm: validate new SA's prefixlen using SA family when sel.family is unset (Sabrina Dubroca) [RHEL-66462 RHEL-66461] {CVE-2024-50142} - xfrm: fix one more kernel-infoleak in algo dumping (CKI Backport Bot) [RHEL-65960] {CVE-2024-50110} diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index ab7f46a162da7..3d1a063a69022 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2500,7 +2500,7 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif return 0; } -static int sof_ipc3_dai_get_clk(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, int clk_type) +static int sof_ipc3_dai_get_param(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, int param_type) { struct sof_dai_private_data *private = dai->private; @@ -2509,15 +2509,17 @@ static int sof_ipc3_dai_get_clk(struct snd_sof_dev *sdev, struct snd_sof_dai *da switch (private->dai_config->type) { case SOF_DAI_INTEL_SSP: - switch (clk_type) { - case SOF_DAI_CLK_INTEL_SSP_MCLK: + switch (param_type) { + case SOF_DAI_PARAM_INTEL_SSP_MCLK: return private->dai_config->ssp.mclk_rate; - case SOF_DAI_CLK_INTEL_SSP_BCLK: + case SOF_DAI_PARAM_INTEL_SSP_BCLK: return private->dai_config->ssp.bclk_rate; + case SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS: + return private->dai_config->ssp.tdm_slots; default: + dev_err(sdev->dev, "invalid SSP param %d\n", param_type); break; } - dev_err(sdev->dev, "fail to get SSP clk %d rate\n", clk_type); break; default: /* not yet implemented for platforms other than the above */ @@ -2692,7 +2694,7 @@ const struct sof_ipc_tplg_ops ipc3_tplg_ops = { .widget_free = sof_ipc3_widget_free, .widget_setup = sof_ipc3_widget_setup, .dai_config = sof_ipc3_dai_config, - .dai_get_clk = sof_ipc3_dai_get_clk, + .dai_get_param = sof_ipc3_dai_get_param, .set_up_all_pipelines = sof_ipc3_set_up_all_pipelines, .tear_down_all_pipelines = sof_ipc3_tear_down_all_pipelines, .parse_manifest = sof_ipc3_parse_manifest, diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c index 4594470ed08b1..e0c2103a74fb2 100644 --- a/sound/soc/sof/ipc4-pcm.c +++ b/sound/soc/sof/ipc4-pcm.c @@ -650,7 +650,7 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0); struct sof_ipc4_audio_format *ipc4_fmt; struct sof_ipc4_copier *ipc4_copier; - bool single_fmt = false; + bool single_bitdepth = false; u32 valid_bits = 0; int dir, ret; @@ -682,18 +682,18 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, return 0; if (dir == SNDRV_PCM_STREAM_PLAYBACK) { - if (sof_ipc4_copier_is_single_format(sdev, + if (sof_ipc4_copier_is_single_bitdepth(sdev, available_fmt->output_pin_fmts, available_fmt->num_output_formats)) { ipc4_fmt = &available_fmt->output_pin_fmts->audio_fmt; - single_fmt = true; + single_bitdepth = true; } } else { - if (sof_ipc4_copier_is_single_format(sdev, + if (sof_ipc4_copier_is_single_bitdepth(sdev, available_fmt->input_pin_fmts, available_fmt->num_input_formats)) { ipc4_fmt = &available_fmt->input_pin_fmts->audio_fmt; - single_fmt = true; + single_bitdepth = true; } } } @@ -703,7 +703,7 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd, if (ret) return ret; - if (single_fmt) { + if (single_bitdepth) { snd_mask_none(fmt); valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(ipc4_fmt->fmt_cfg); dev_dbg(component->dev, "Set %s to %d bit format\n", dai->name, valid_bits); diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index 5f75aa09c21f9..8521cc9537541 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -195,9 +195,10 @@ static void sof_ipc4_dbg_audio_format(struct device *dev, struct sof_ipc4_pin_fo for (i = 0; i < num_formats; i++) { struct sof_ipc4_audio_format *fmt = &pin_fmt[i].audio_fmt; dev_dbg(dev, - "Pin index #%d: %uHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x) buffer size %d\n", - pin_fmt[i].pin_index, fmt->sampling_frequency, fmt->bit_depth, fmt->ch_map, - fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg, + "Pin index #%d: %uHz, %ubit, %luch (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x) buffer size %d\n", + pin_fmt[i].pin_index, fmt->sampling_frequency, fmt->bit_depth, + SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg), + fmt->ch_map, fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg, pin_fmt[i].buffer_size); } } @@ -594,7 +595,6 @@ static int sof_ipc4_widget_setup_comp_dai(struct snd_sof_widget *swidget) switch (ipc4_copier->dai_type) { case SOF_DAI_INTEL_ALH: { - struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct sof_ipc4_alh_configuration_blob *blob; struct snd_soc_dapm_path *p; struct snd_sof_widget *w; @@ -1078,42 +1078,50 @@ static int sof_ipc4_widget_assign_instance_id(struct snd_sof_dev *sdev, /* update hw_params based on the audio stream format */ static int sof_ipc4_update_hw_params(struct snd_sof_dev *sdev, struct snd_pcm_hw_params *params, - struct sof_ipc4_audio_format *fmt) + struct sof_ipc4_audio_format *fmt, u32 param_to_update) { - snd_pcm_format_t snd_fmt; struct snd_interval *i; - struct snd_mask *m; - int valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(fmt->fmt_cfg); - unsigned int channels, rate; - switch (valid_bits) { - case 16: - snd_fmt = SNDRV_PCM_FORMAT_S16_LE; - break; - case 24: - snd_fmt = SNDRV_PCM_FORMAT_S24_LE; - break; - case 32: - snd_fmt = SNDRV_PCM_FORMAT_S32_LE; - break; - default: - dev_err(sdev->dev, "invalid PCM valid_bits %d\n", valid_bits); - return -EINVAL; + if (param_to_update & BIT(SNDRV_PCM_HW_PARAM_FORMAT)) { + int valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(fmt->fmt_cfg); + snd_pcm_format_t snd_fmt; + struct snd_mask *m; + + switch (valid_bits) { + case 16: + snd_fmt = SNDRV_PCM_FORMAT_S16_LE; + break; + case 24: + snd_fmt = SNDRV_PCM_FORMAT_S24_LE; + break; + case 32: + snd_fmt = SNDRV_PCM_FORMAT_S32_LE; + break; + default: + dev_err(sdev->dev, "invalid PCM valid_bits %d\n", valid_bits); + return -EINVAL; + } + + m = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + snd_mask_none(m); + snd_mask_set_format(m, snd_fmt); } - m = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); - snd_mask_none(m); - snd_mask_set_format(m, snd_fmt); + if (param_to_update & BIT(SNDRV_PCM_HW_PARAM_RATE)) { + unsigned int rate = fmt->sampling_frequency; - rate = fmt->sampling_frequency; - i = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); - i->min = rate; - i->max = rate; + i = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + i->min = rate; + i->max = rate; + } - channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg); - i = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); - i->min = channels; - i->max = channels; + if (param_to_update & BIT(SNDRV_PCM_HW_PARAM_CHANNELS)) { + unsigned int channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg); + + i = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS); + i->min = channels; + i->max = channels; + } return 0; } @@ -1300,7 +1308,13 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget) ipc4_copier = dai->private; if (pipeline->use_chain_dma) { - pipeline->msg.primary = 0; + /* + * Preserve the DMA Link ID and clear other bits since + * the DMA Link ID is only configured once during + * dai_config, other fields are expected to be 0 for + * re-configuration + */ + pipeline->msg.primary &= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; pipeline->msg.extension = 0; } @@ -1371,13 +1385,16 @@ static int snd_sof_get_hw_config_params(struct snd_sof_dev *sdev, struct snd_sof return 0; } -static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, - struct snd_pcm_hw_params *params, u32 dai_index, - u32 linktype, u8 dir, u32 **dst, u32 *len) +static int +snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, + bool single_bitdepth, + struct snd_pcm_hw_params *params, u32 dai_index, + u32 linktype, u8 dir, u32 **dst, u32 *len) { struct sof_ipc4_fw_data *ipc4_data = sdev->private; struct nhlt_specific_cfg *cfg; int sample_rate, channel_count; + bool format_change = false; int bit_depth, ret; u32 nhlt_type; int dev_type = 0; @@ -1386,9 +1403,18 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s switch (linktype) { case SOF_DAI_INTEL_DMIC: nhlt_type = NHLT_LINK_DMIC; - bit_depth = params_width(params); channel_count = params_channels(params); sample_rate = params_rate(params); + bit_depth = params_width(params); + /* + * Look for 32-bit blob first instead of 16-bit if copier + * supports multiple formats + */ + if (bit_depth == 16 && !single_bitdepth) { + dev_dbg(sdev->dev, "Looking for 32-bit blob first for DMIC\n"); + format_change = true; + bit_depth = 32; + } break; case SOF_DAI_INTEL_SSP: nhlt_type = NHLT_LINK_SSP; @@ -1422,30 +1448,83 @@ static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_s dir, dev_type); if (!cfg) { + bool get_new_blob = false; + + if (format_change) { + /* + * The 32-bit blob was not found in NHLT table, try to + * look for one based on the params + */ + bit_depth = params_width(params); + format_change = false; + get_new_blob = true; + } else if (linktype == SOF_DAI_INTEL_DMIC && !single_bitdepth) { + /* + * The requested 32-bit blob (no format change for the + * blob request) was not found in NHLT table, try to + * look for 16-bit blob if the copier supports multiple + * formats + */ + bit_depth = 16; + format_change = true; + get_new_blob = true; + } + + if (get_new_blob) { + cfg = intel_nhlt_get_endpoint_blob(sdev->dev, ipc4_data->nhlt, + dai_index, nhlt_type, + bit_depth, bit_depth, + channel_count, sample_rate, + dir, dev_type); + if (cfg) + goto out; + } + dev_err(sdev->dev, "no matching blob for sample rate: %d sample width: %d channels: %d\n", sample_rate, bit_depth, channel_count); return -EINVAL; } +out: /* config length should be in dwords */ *len = cfg->size >> 2; *dst = (u32 *)cfg->caps; + if (format_change) { + /* + * Update the params to reflect that different blob was loaded + * instead of the requested bit depth (16 -> 32 or 32 -> 16). + * This information is going to be used by the caller to find + * matching copier format on the dai side. + */ + struct snd_mask *m; + + m = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + snd_mask_none(m); + if (bit_depth == 16) + snd_mask_set_format(m, SNDRV_PCM_FORMAT_S16_LE); + else + snd_mask_set_format(m, SNDRV_PCM_FORMAT_S32_LE); + + } + return 0; } #else -static int snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, - struct snd_pcm_hw_params *params, u32 dai_index, - u32 linktype, u8 dir, u32 **dst, u32 *len) +static int +snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, + bool single_bitdepth, + struct snd_pcm_hw_params *params, u32 dai_index, + u32 linktype, u8 dir, u32 **dst, u32 *len) { return 0; } #endif -bool sof_ipc4_copier_is_single_format(struct snd_sof_dev *sdev, - struct sof_ipc4_pin_format *pin_fmts, - u32 pin_fmts_size) +bool sof_ipc4_copier_is_single_bitdepth(struct snd_sof_dev *sdev, + struct sof_ipc4_pin_format *pin_fmts, + u32 pin_fmts_size) { struct sof_ipc4_audio_format *fmt; u32 valid_bits; @@ -1468,6 +1547,105 @@ bool sof_ipc4_copier_is_single_format(struct snd_sof_dev *sdev, return true; } +static int +sof_ipc4_adjust_params_to_dai_format(struct snd_sof_dev *sdev, + struct snd_pcm_hw_params *params, + struct sof_ipc4_pin_format *pin_fmts, + u32 pin_fmts_size) +{ + u32 params_mask = BIT(SNDRV_PCM_HW_PARAM_RATE) | + BIT(SNDRV_PCM_HW_PARAM_CHANNELS) | + BIT(SNDRV_PCM_HW_PARAM_FORMAT); + struct sof_ipc4_audio_format *fmt; + u32 rate, channels, valid_bits; + int i; + + fmt = &pin_fmts[0].audio_fmt; + rate = fmt->sampling_frequency; + channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg); + valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(fmt->fmt_cfg); + + /* check if parameters in topology defined formats are the same */ + for (i = 1; i < pin_fmts_size; i++) { + u32 val; + + fmt = &pin_fmts[i].audio_fmt; + + if (params_mask & BIT(SNDRV_PCM_HW_PARAM_RATE)) { + val = fmt->sampling_frequency; + if (val != rate) + params_mask &= ~BIT(SNDRV_PCM_HW_PARAM_RATE); + } + if (params_mask & BIT(SNDRV_PCM_HW_PARAM_CHANNELS)) { + val = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg); + if (val != channels) + params_mask &= ~BIT(SNDRV_PCM_HW_PARAM_CHANNELS); + } + if (params_mask & BIT(SNDRV_PCM_HW_PARAM_FORMAT)) { + val = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(fmt->fmt_cfg); + if (val != valid_bits) + params_mask &= ~BIT(SNDRV_PCM_HW_PARAM_FORMAT); + } + } + + if (params_mask) + return sof_ipc4_update_hw_params(sdev, params, + &pin_fmts[0].audio_fmt, + params_mask); + + return 0; +} + +static int +sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, + struct snd_pcm_hw_params *params, int dir) +{ + struct sof_ipc4_available_audio_format *available_fmt; + struct snd_pcm_hw_params dai_params = *params; + struct sof_ipc4_copier_data *copier_data; + struct sof_ipc4_pin_format *pin_fmts; + struct sof_ipc4_copier *ipc4_copier; + bool single_bitdepth; + u32 num_pin_fmts; + int ret; + + ipc4_copier = dai->private; + copier_data = &ipc4_copier->data; + available_fmt = &ipc4_copier->available_fmt; + + /* + * Fixup the params based on the format parameters of the DAI. If any + * of the RATE, CHANNELS, bit depth is static among the formats then + * narrow the params to only allow that specific parameter value. + */ + if (dir == SNDRV_PCM_STREAM_PLAYBACK) { + pin_fmts = available_fmt->output_pin_fmts; + num_pin_fmts = available_fmt->num_output_formats; + } else { + pin_fmts = available_fmt->input_pin_fmts; + num_pin_fmts = available_fmt->num_input_formats; + } + + ret = sof_ipc4_adjust_params_to_dai_format(sdev, &dai_params, pin_fmts, + num_pin_fmts); + if (ret) + return ret; + + single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev, pin_fmts, + num_pin_fmts); + ret = snd_sof_get_nhlt_endpoint_data(sdev, dai, single_bitdepth, + &dai_params, + ipc4_copier->dai_index, + ipc4_copier->dai_type, dir, + &ipc4_copier->copier_config, + &copier_data->gtw_cfg.config_length); + /* Update the params to reflect the changes made in this function */ + if (!ret) + *params = dai_params; + + return ret; +} + static int sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, struct snd_pcm_hw_params *fe_params, @@ -1478,7 +1656,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, struct snd_soc_component *scomp = swidget->scomp; struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(scomp); struct sof_ipc4_copier_data *copier_data; - struct snd_pcm_hw_params *ref_params; + struct snd_pcm_hw_params ref_params; struct sof_ipc4_copier *ipc4_copier; struct snd_sof_dai *dai; u32 gtw_cfg_config_length; @@ -1490,7 +1668,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, u32 out_ref_rate, out_ref_channels; u32 deep_buffer_dma_ms = 0; int output_fmt_index; - bool single_output_format; + bool single_output_bitdepth; int i; dev_dbg(sdev->dev, "copier %s, type %d", swidget->widget->name, swidget->id); @@ -1556,9 +1734,9 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, * for capture. */ if (dir == SNDRV_PCM_STREAM_PLAYBACK) - ref_params = fe_params; + ref_params = *fe_params; else - ref_params = pipeline_params; + ref_params = *pipeline_params; copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK; copier_data->gtw_cfg.node_id |= @@ -1584,23 +1762,25 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, available_fmt = &ipc4_copier->available_fmt; /* - * When there is format conversion within a pipeline, the number of supported - * output formats is typically limited to just 1 for the DAI copiers. But when there - * is no format conversion, the DAI copiers input format must match that of the - * FE hw_params for capture and the pipeline params for playback. + * Use the fe_params as a base for the copier configuration. + * The ref_params might get updated to reflect what format is + * supported by the copier on the DAI side. + * + * In case of capture the ref_params returned will be used to + * find the input configuration of the copier. */ - if (dir == SNDRV_PCM_STREAM_PLAYBACK) - ref_params = pipeline_params; - else - ref_params = fe_params; - - ret = snd_sof_get_nhlt_endpoint_data(sdev, dai, fe_params, ipc4_copier->dai_index, - ipc4_copier->dai_type, dir, - &ipc4_copier->copier_config, - &copier_data->gtw_cfg.config_length); + ref_params = *fe_params; + ret = sof_ipc4_prepare_dai_copier(sdev, dai, &ref_params, dir); if (ret < 0) return ret; + /* + * For playback the pipeline_params needs to be used to find the + * input configuration of the copier. + */ + if (dir == SNDRV_PCM_STREAM_PLAYBACK) + ref_params = *pipeline_params; + break; } case snd_soc_dapm_buffer: @@ -1608,7 +1788,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, ipc4_copier = (struct sof_ipc4_copier *)swidget->private; copier_data = &ipc4_copier->data; available_fmt = &ipc4_copier->available_fmt; - ref_params = pipeline_params; + ref_params = *pipeline_params; break; } @@ -1619,15 +1799,15 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, } /* set input and output audio formats */ - ret = sof_ipc4_init_input_audio_fmt(sdev, swidget, &copier_data->base_config, ref_params, - available_fmt); + ret = sof_ipc4_init_input_audio_fmt(sdev, swidget, &copier_data->base_config, + &ref_params, available_fmt); if (ret < 0) return ret; /* set the reference params for output format selection */ - single_output_format = sof_ipc4_copier_is_single_format(sdev, - available_fmt->output_pin_fmts, - available_fmt->num_output_formats); + single_output_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev, + available_fmt->output_pin_fmts, + available_fmt->num_output_formats); switch (swidget->id) { case snd_soc_dapm_aif_in: case snd_soc_dapm_dai_out: @@ -1639,7 +1819,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, out_ref_rate = in_fmt->sampling_frequency; out_ref_channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(in_fmt->fmt_cfg); - if (!single_output_format) + if (!single_output_bitdepth) out_ref_valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(in_fmt->fmt_cfg); break; @@ -1648,7 +1828,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, case snd_soc_dapm_dai_in: out_ref_rate = params_rate(fe_params); out_ref_channels = params_channels(fe_params); - if (!single_output_format) { + if (!single_output_bitdepth) { out_ref_valid_bits = sof_ipc4_get_valid_bits(sdev, fe_params); if (out_ref_valid_bits < 0) return out_ref_valid_bits; @@ -1666,7 +1846,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, * if the output format is the same across all available output formats, choose * that as the reference. */ - if (single_output_format) { + if (single_output_bitdepth) { struct sof_ipc4_audio_format *out_fmt; out_fmt = &available_fmt->output_pin_fmts[0].audio_fmt; @@ -1803,7 +1983,11 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, } /* modify the input params for the next widget */ - ret = sof_ipc4_update_hw_params(sdev, pipeline_params, &copier_data->out_format); + ret = sof_ipc4_update_hw_params(sdev, pipeline_params, + &copier_data->out_format, + BIT(SNDRV_PCM_HW_PARAM_FORMAT) | + BIT(SNDRV_PCM_HW_PARAM_CHANNELS) | + BIT(SNDRV_PCM_HW_PARAM_RATE)); if (ret) return ret; @@ -2028,7 +2212,10 @@ static int sof_ipc4_prepare_src_module(struct snd_sof_widget *swidget, src->data.sink_rate = out_audio_fmt->sampling_frequency; /* update pipeline_params for sink widgets */ - return sof_ipc4_update_hw_params(sdev, pipeline_params, out_audio_fmt); + return sof_ipc4_update_hw_params(sdev, pipeline_params, out_audio_fmt, + BIT(SNDRV_PCM_HW_PARAM_FORMAT) | + BIT(SNDRV_PCM_HW_PARAM_CHANNELS) | + BIT(SNDRV_PCM_HW_PARAM_RATE)); } static int @@ -2152,7 +2339,11 @@ static int sof_ipc4_prepare_process_module(struct snd_sof_widget *swidget, sizeof(struct sof_ipc4_audio_format)); /* modify the pipeline params with the pin 0 output format */ - ret = sof_ipc4_update_hw_params(sdev, pipeline_params, &process->output_format); + ret = sof_ipc4_update_hw_params(sdev, pipeline_params, + &process->output_format, + BIT(SNDRV_PCM_HW_PARAM_FORMAT) | + BIT(SNDRV_PCM_HW_PARAM_CHANNELS) | + BIT(SNDRV_PCM_HW_PARAM_RATE)); if (ret) return ret; } @@ -2634,7 +2825,7 @@ static void sof_ipc4_put_queue_id(struct snd_sof_widget *swidget, int queue_id, static int sof_ipc4_set_copier_sink_format(struct snd_sof_dev *sdev, struct snd_sof_widget *src_widget, struct snd_sof_widget *sink_widget, - int sink_id) + struct snd_sof_route *sroute) { struct sof_ipc4_copier_config_set_sink_format format; const struct sof_ipc_ops *iops = sdev->ipc->ops; @@ -2643,9 +2834,6 @@ static int sof_ipc4_set_copier_sink_format(struct snd_sof_dev *sdev, struct sof_ipc4_fw_module *fw_module; struct sof_ipc4_msg msg = {{ 0 }}; - dev_dbg(sdev->dev, "%s set copier sink %d format\n", - src_widget->widget->name, sink_id); - if (WIDGET_IS_DAI(src_widget->id)) { struct snd_sof_dai *dai = src_widget->private; @@ -2656,13 +2844,15 @@ static int sof_ipc4_set_copier_sink_format(struct snd_sof_dev *sdev, fw_module = src_widget->module_info; - format.sink_id = sink_id; + format.sink_id = sroute->src_queue_id; memcpy(&format.source_fmt, &src_config->audio_fmt, sizeof(format.source_fmt)); - pin_fmt = sof_ipc4_get_input_pin_audio_fmt(sink_widget, sink_id); + pin_fmt = sof_ipc4_get_input_pin_audio_fmt(sink_widget, sroute->dst_queue_id); if (!pin_fmt) { - dev_err(sdev->dev, "Unable to get pin %d format for %s", - sink_id, sink_widget->widget->name); + dev_err(sdev->dev, + "Failed to get input audio format of %s:%d for output of %s:%d\n", + sink_widget->widget->name, sroute->dst_queue_id, + src_widget->widget->name, sroute->src_queue_id); return -EINVAL; } @@ -2720,7 +2910,8 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route * sroute->src_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget, SOF_PIN_TYPE_OUTPUT); if (sroute->src_queue_id < 0) { - dev_err(sdev->dev, "failed to get queue ID for source widget: %s\n", + dev_err(sdev->dev, + "failed to get src_queue_id ID from source widget %s\n", src_widget->widget->name); return sroute->src_queue_id; } @@ -2728,7 +2919,8 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route * sroute->dst_queue_id = sof_ipc4_get_queue_id(src_widget, sink_widget, SOF_PIN_TYPE_INPUT); if (sroute->dst_queue_id < 0) { - dev_err(sdev->dev, "failed to get queue ID for sink widget: %s\n", + dev_err(sdev->dev, + "failed to get dst_queue_id ID from sink widget %s\n", sink_widget->widget->name); sof_ipc4_put_queue_id(src_widget, sroute->src_queue_id, SOF_PIN_TYPE_OUTPUT); @@ -2737,10 +2929,11 @@ static int sof_ipc4_route_setup(struct snd_sof_dev *sdev, struct snd_sof_route * /* Pin 0 format is already set during copier module init */ if (sroute->src_queue_id > 0 && WIDGET_IS_COPIER(src_widget->id)) { - ret = sof_ipc4_set_copier_sink_format(sdev, src_widget, sink_widget, - sroute->src_queue_id); + ret = sof_ipc4_set_copier_sink_format(sdev, src_widget, + sink_widget, sroute); if (ret < 0) { - dev_err(sdev->dev, "failed to set sink format for %s source queue ID %d\n", + dev_err(sdev->dev, + "failed to set sink format for source %s:%d\n", src_widget->widget->name, sroute->src_queue_id); goto out; } @@ -2858,8 +3051,14 @@ static int sof_ipc4_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget * return 0; if (pipeline->use_chain_dma) { - pipeline->msg.primary &= ~SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; - pipeline->msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(data->dai_data); + /* + * Only configure the DMA Link ID for ChainDMA when this op is + * invoked with SOF_DAI_CONFIG_FLAGS_HW_PARAMS + */ + if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) { + pipeline->msg.primary &= ~SOF_IPC4_GLB_CHAIN_DMA_LINK_ID_MASK; + pipeline->msg.primary |= SOF_IPC4_GLB_CHAIN_DMA_LINK_ID(data->dai_data); + } return 0; } @@ -2960,7 +3159,7 @@ static int sof_ipc4_parse_manifest(struct snd_soc_component *scomp, int index, return 0; } -static int sof_ipc4_dai_get_clk(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, int clk_type) +static int sof_ipc4_dai_get_param(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, int param_type) { struct sof_ipc4_copier *ipc4_copier = dai->private; struct snd_soc_tplg_hw_config *hw_config; @@ -2999,13 +3198,15 @@ static int sof_ipc4_dai_get_clk(struct snd_sof_dev *sdev, struct snd_sof_dai *da switch (ipc4_copier->dai_type) { case SOF_DAI_INTEL_SSP: - switch (clk_type) { - case SOF_DAI_CLK_INTEL_SSP_MCLK: + switch (param_type) { + case SOF_DAI_PARAM_INTEL_SSP_MCLK: return le32_to_cpu(hw_config->mclk_rate); - case SOF_DAI_CLK_INTEL_SSP_BCLK: + case SOF_DAI_PARAM_INTEL_SSP_BCLK: return le32_to_cpu(hw_config->bclk_rate); + case SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS: + return le32_to_cpu(hw_config->tdm_slots); default: - dev_err(sdev->dev, "Invalid clk type for SSP %d\n", clk_type); + dev_err(sdev->dev, "invalid SSP param %d\n", param_type); break; } break; @@ -3068,14 +3269,17 @@ static int sof_ipc4_link_setup(struct snd_sof_dev *sdev, struct snd_soc_dai_link return 0; } -static enum sof_tokens common_copier_token_list[] = { +/* Tokens needed for different copier variants (aif, dai and buffer) */ +static enum sof_tokens copier_token_list[] = { SOF_COMP_TOKENS, + SOF_COPIER_TOKENS, SOF_AUDIO_FMT_NUM_TOKENS, SOF_IN_AUDIO_FORMAT_TOKENS, SOF_OUT_AUDIO_FORMAT_TOKENS, - SOF_COPIER_DEEP_BUFFER_TOKENS, - SOF_COPIER_TOKENS, SOF_COMP_EXT_TOKENS, + + SOF_COPIER_DEEP_BUFFER_TOKENS, /* for AIF copier */ + SOF_DAI_TOKENS, /* for DAI copier */ }; static enum sof_tokens pipeline_token_list[] = { @@ -3083,16 +3287,6 @@ static enum sof_tokens pipeline_token_list[] = { SOF_PIPELINE_TOKENS, }; -static enum sof_tokens dai_token_list[] = { - SOF_COMP_TOKENS, - SOF_AUDIO_FMT_NUM_TOKENS, - SOF_IN_AUDIO_FORMAT_TOKENS, - SOF_OUT_AUDIO_FORMAT_TOKENS, - SOF_COPIER_TOKENS, - SOF_DAI_TOKENS, - SOF_COMP_EXT_TOKENS, -}; - static enum sof_tokens pga_token_list[] = { SOF_COMP_TOKENS, SOF_GAIN_TOKENS, @@ -3129,23 +3323,23 @@ static enum sof_tokens process_token_list[] = { static const struct sof_ipc_tplg_widget_ops tplg_ipc4_widget_ops[SND_SOC_DAPM_TYPE_COUNT] = { [snd_soc_dapm_aif_in] = {sof_ipc4_widget_setup_pcm, sof_ipc4_widget_free_comp_pcm, - common_copier_token_list, ARRAY_SIZE(common_copier_token_list), + copier_token_list, ARRAY_SIZE(copier_token_list), NULL, sof_ipc4_prepare_copier_module, sof_ipc4_unprepare_copier_module}, [snd_soc_dapm_aif_out] = {sof_ipc4_widget_setup_pcm, sof_ipc4_widget_free_comp_pcm, - common_copier_token_list, ARRAY_SIZE(common_copier_token_list), + copier_token_list, ARRAY_SIZE(copier_token_list), NULL, sof_ipc4_prepare_copier_module, sof_ipc4_unprepare_copier_module}, [snd_soc_dapm_dai_in] = {sof_ipc4_widget_setup_comp_dai, sof_ipc4_widget_free_comp_dai, - dai_token_list, ARRAY_SIZE(dai_token_list), NULL, + copier_token_list, ARRAY_SIZE(copier_token_list), NULL, sof_ipc4_prepare_copier_module, sof_ipc4_unprepare_copier_module}, [snd_soc_dapm_dai_out] = {sof_ipc4_widget_setup_comp_dai, sof_ipc4_widget_free_comp_dai, - dai_token_list, ARRAY_SIZE(dai_token_list), NULL, + copier_token_list, ARRAY_SIZE(copier_token_list), NULL, sof_ipc4_prepare_copier_module, sof_ipc4_unprepare_copier_module}, [snd_soc_dapm_buffer] = {sof_ipc4_widget_setup_pcm, sof_ipc4_widget_free_comp_pcm, - common_copier_token_list, ARRAY_SIZE(common_copier_token_list), + copier_token_list, ARRAY_SIZE(copier_token_list), NULL, sof_ipc4_prepare_copier_module, sof_ipc4_unprepare_copier_module}, [snd_soc_dapm_scheduler] = {sof_ipc4_widget_setup_comp_pipeline, @@ -3182,7 +3376,7 @@ const struct sof_ipc_tplg_ops ipc4_tplg_ops = { .route_free = sof_ipc4_route_free, .dai_config = sof_ipc4_dai_config, .parse_manifest = sof_ipc4_parse_manifest, - .dai_get_clk = sof_ipc4_dai_get_clk, + .dai_get_param = sof_ipc4_dai_get_param, .tear_down_all_pipelines = sof_ipc4_tear_down_all_pipelines, .link_setup = sof_ipc4_link_setup, }; diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h index 6e33208a357f5..603cb4a2b925d 100644 --- a/sound/soc/sof/ipc4-topology.h +++ b/sound/soc/sof/ipc4-topology.h @@ -476,7 +476,7 @@ struct sof_ipc4_process { u32 init_config; }; -bool sof_ipc4_copier_is_single_format(struct snd_sof_dev *sdev, - struct sof_ipc4_pin_format *pin_fmts, - u32 pin_fmts_size); +bool sof_ipc4_copier_is_single_bitdepth(struct snd_sof_dev *sdev, + struct sof_ipc4_pin_format *pin_fmts, + u32 pin_fmts_size); #endif diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index 32fef64ef10dd..83a746585b1c7 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -978,7 +978,7 @@ struct snd_sof_dai *snd_sof_find_dai(struct snd_soc_component *scomp, return NULL; } -static int sof_dai_get_clk(struct snd_soc_pcm_runtime *rtd, int clk_type) +static int sof_dai_get_param(struct snd_soc_pcm_runtime *rtd, int param_type) { struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, SOF_AUDIO_PCM_DRV_NAME); @@ -991,8 +991,8 @@ static int sof_dai_get_clk(struct snd_soc_pcm_runtime *rtd, int clk_type) if (!dai) return 0; - if (tplg_ops && tplg_ops->dai_get_clk) - return tplg_ops->dai_get_clk(sdev, dai, clk_type); + if (tplg_ops && tplg_ops->dai_get_param) + return tplg_ops->dai_get_param(sdev, dai, param_type); return 0; } @@ -1003,7 +1003,7 @@ static int sof_dai_get_clk(struct snd_soc_pcm_runtime *rtd, int clk_type) */ int sof_dai_get_mclk(struct snd_soc_pcm_runtime *rtd) { - return sof_dai_get_clk(rtd, SOF_DAI_CLK_INTEL_SSP_MCLK); + return sof_dai_get_param(rtd, SOF_DAI_PARAM_INTEL_SSP_MCLK); } EXPORT_SYMBOL(sof_dai_get_mclk); @@ -1013,6 +1013,16 @@ EXPORT_SYMBOL(sof_dai_get_mclk); */ int sof_dai_get_bclk(struct snd_soc_pcm_runtime *rtd) { - return sof_dai_get_clk(rtd, SOF_DAI_CLK_INTEL_SSP_BCLK); + return sof_dai_get_param(rtd, SOF_DAI_PARAM_INTEL_SSP_BCLK); } EXPORT_SYMBOL(sof_dai_get_bclk); + +/* + * Helper to get SSP TDM slot number from a pcm_runtime. + * Return 0 if not exist. + */ +int sof_dai_get_tdm_slots(struct snd_soc_pcm_runtime *rtd) +{ + return sof_dai_get_param(rtd, SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS); +} +EXPORT_SYMBOL(sof_dai_get_tdm_slots); diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 2aeb269af752a..2fe2ba429d92b 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -44,8 +44,9 @@ #define WIDGET_IS_AIF_OR_DAI(id) (WIDGET_IS_DAI(id) || WIDGET_IS_AIF(id)) #define WIDGET_IS_COPIER(id) (WIDGET_IS_AIF_OR_DAI(id) || (id) == snd_soc_dapm_buffer) -#define SOF_DAI_CLK_INTEL_SSP_MCLK 0 -#define SOF_DAI_CLK_INTEL_SSP_BCLK 1 +#define SOF_DAI_PARAM_INTEL_SSP_MCLK 0 +#define SOF_DAI_PARAM_INTEL_SSP_BCLK 1 +#define SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS 2 enum sof_widget_op { SOF_WIDGET_PREPARE, @@ -208,7 +209,7 @@ struct sof_ipc_tplg_widget_ops { * @widget_setup: Function pointer for setting up setup in the DSP * @widget_free: Function pointer for freeing widget in the DSP * @dai_config: Function pointer for sending DAI config IPC to the DSP - * @dai_get_clk: Function pointer for getting the DAI clock setting + * @dai_get_param: Function pointer for getting the DAI parameter * @set_up_all_pipelines: Function pointer for setting up all topology pipelines * @tear_down_all_pipelines: Function pointer for tearing down all topology pipelines * @parse_manifest: Function pointer for ipc4 specific parsing of topology manifest @@ -229,7 +230,7 @@ struct sof_ipc_tplg_ops { int (*widget_free)(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget); int (*dai_config)(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget, unsigned int flags, struct snd_sof_dai_config_data *data); - int (*dai_get_clk)(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, int clk_type); + int (*dai_get_param)(struct snd_sof_dev *sdev, struct snd_sof_dai *dai, int param_type); int (*set_up_all_pipelines)(struct snd_sof_dev *sdev, bool verify); int (*tear_down_all_pipelines)(struct snd_sof_dev *sdev, bool verify); int (*parse_manifest)(struct snd_soc_component *scomp, int index, diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index a6991877e50cd..78d3fec8be242 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload" +BUGS="flush_remove_add reload net_port_proto_match" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -371,6 +371,22 @@ race_repeat 0 perf_duration 0 " +TYPE_net_port_proto_match=" +display net,port,proto +type_spec ipv4_addr . inet_service . inet_proto +chain_spec ip daddr . udp dport . meta l4proto +dst addr4 port proto +src +start 1 +count 9 +src_delta 9 +tools sendip bash +proto udp + +race_repeat 0 + +perf_duration 0 +" # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1543,6 +1559,64 @@ test_bug_reload() { nft flush ruleset } +# - add ranged element, check that packets match it +# - delete element again, check it is gone +test_bug_net_port_proto_match() { + setup veth send_"${proto}" set || return ${ksft_skip} + rstart=${start} + + range_size=1 + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + + nft "add element inet filter test { $elem }" || return 1 + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-add: should have returned $elem but got $got" + return 1 + fi + done + done + + # recheck after set was filled + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-fill: should have returned $elem but got $got" + return 1 + fi + done + done + + # random del and re-fetch + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + local rnd=$((RANDOM%10)) + local got="" + + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + if [ $rnd -gt 0 ];then + continue + fi + + nft "delete element inet filter test { $elem }" + got=$(nft "get element inet filter test { $elem }" 2>/dev/null) + if [ $? -eq 0 ];then + err "post-delete: query for $elem returned $got instead of error." + return 1 + fi + done + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" }