From 6d41d4fe28724db16ca1016df0713a07e0cc7448 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Tue, 15 Aug 2023 22:18:34 +0800 Subject: [PATCH 001/661] net: xfrm: skip policies marked as dead while reinserting policies BUG: KASAN: slab-use-after-free in xfrm_policy_inexact_list_reinsert+0xb6/0x430 Read of size 1 at addr ffff8881051f3bf8 by task ip/668 CPU: 2 PID: 668 Comm: ip Not tainted 6.5.0-rc5-00182-g25aa0bebba72-dirty #64 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13 04/01/2014 Call Trace: dump_stack_lvl+0x72/0xa0 print_report+0xd0/0x620 kasan_report+0xb6/0xf0 xfrm_policy_inexact_list_reinsert+0xb6/0x430 xfrm_policy_inexact_insert_node.constprop.0+0x537/0x800 xfrm_policy_inexact_alloc_chain+0x23f/0x320 xfrm_policy_inexact_insert+0x6b/0x590 xfrm_policy_insert+0x3b1/0x480 xfrm_add_policy+0x23c/0x3c0 xfrm_user_rcv_msg+0x2d0/0x510 netlink_rcv_skb+0x10d/0x2d0 xfrm_netlink_rcv+0x49/0x60 netlink_unicast+0x3fe/0x540 netlink_sendmsg+0x528/0x970 sock_sendmsg+0x14a/0x160 ____sys_sendmsg+0x4fc/0x580 ___sys_sendmsg+0xef/0x160 __sys_sendmsg+0xf7/0x1b0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x73/0xdd The root cause is: cpu 0 cpu1 xfrm_dump_policy xfrm_policy_walk list_move_tail xfrm_add_policy ... ... xfrm_policy_inexact_list_reinsert list_for_each_entry_reverse if (!policy->bydst_reinsert) //read non-existent policy xfrm_dump_policy_done xfrm_policy_walk_done list_del(&walk->walk.all); If dump_one_policy() returns err (triggered by netlink socket), xfrm_policy_walk() will move walk initialized by socket to list net->xfrm.policy_all. so this socket becomes visible in the global policy list. The head *walk can be traversed when users add policies with different prefixlen and trigger xfrm_policy node merge. The issue can also be triggered by policy list traversal while rehashing and flushing policies. It can be fixed by skip such "policies" with walk.dead set to 1. Fixes: 9cf545ebd591 ("xfrm: policy: store inexact policies in a tree ordered by destination address") Fixes: 12a169e7d8f4 ("ipsec: Put dumpers on the dump list") Signed-off-by: Dong Chenchen Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d6b405782b6361..113fb7e9cdaf20 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -851,7 +851,7 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, struct hlist_node *newpos = NULL; bool matches_s, matches_d; - if (!policy->bydst_reinsert) + if (policy->walk.dead || !policy->bydst_reinsert) continue; WARN_ON_ONCE(policy->family != family); @@ -1256,8 +1256,11 @@ static void xfrm_hash_rebuild(struct work_struct *work) struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; + if (policy->walk.dead) + continue; + dir = xfrm_policy_id2dir(policy->index); - if (policy->walk.dead || dir >= XFRM_POLICY_MAX) + if (dir >= XFRM_POLICY_MAX) continue; if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { @@ -1823,9 +1826,11 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { + if (pol->walk.dead) + continue; + dir = xfrm_policy_id2dir(pol->index); - if (pol->walk.dead || - dir >= XFRM_POLICY_MAX || + if (dir >= XFRM_POLICY_MAX || pol->type != type) continue; @@ -1862,9 +1867,11 @@ int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { + if (pol->walk.dead) + continue; + dir = xfrm_policy_id2dir(pol->index); - if (pol->walk.dead || - dir >= XFRM_POLICY_MAX || + if (dir >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; From f7c4e3e5d4f6609b4725a97451948ca2e425379a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Sep 2023 13:23:03 +0000 Subject: [PATCH 002/661] xfrm: interface: use DEV_STATS_INC() syzbot/KCSAN reported data-races in xfrm whenever dev->stats fields are updated. It appears all of these updates can happen from multiple cpus. Adopt SMP safe DEV_STATS_INC() to update dev->stats fields. BUG: KCSAN: data-race in xfrmi_xmit / xfrmi_xmit read-write to 0xffff88813726b160 of 8 bytes by task 23986 on cpu 1: xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 __netdev_start_xmit include/linux/netdevice.h:4889 [inline] netdev_start_xmit include/linux/netdevice.h:4903 [inline] xmit_one net/core/dev.c:3544 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 __dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 dev_queue_xmit include/linux/netdevice.h:3082 [inline] neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:293 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 ___sys_sendmsg net/socket.c:2594 [inline] __sys_sendmmsg+0x269/0x500 net/socket.c:2680 __do_sys_sendmmsg net/socket.c:2709 [inline] __se_sys_sendmmsg net/socket.c:2706 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read-write to 0xffff88813726b160 of 8 bytes by task 23987 on cpu 0: xfrmi_xmit+0x74e/0xb20 net/xfrm/xfrm_interface_core.c:583 __netdev_start_xmit include/linux/netdevice.h:4889 [inline] netdev_start_xmit include/linux/netdevice.h:4903 [inline] xmit_one net/core/dev.c:3544 [inline] dev_hard_start_xmit+0x11b/0x3f0 net/core/dev.c:3560 __dev_queue_xmit+0xeee/0x1de0 net/core/dev.c:4340 dev_queue_xmit include/linux/netdevice.h:3082 [inline] neigh_connected_output+0x231/0x2a0 net/core/neighbour.c:1581 neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x74a/0x850 net/ipv4/ip_output.c:230 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:318 NF_HOOK_COND include/linux/netfilter.h:293 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:432 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x72/0xe0 net/ipv4/ip_output.c:1487 udp_send_skb+0x6a4/0x990 net/ipv4/udp.c:963 udp_sendmsg+0x1249/0x12d0 net/ipv4/udp.c:1246 inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:840 sock_sendmsg_nosec net/socket.c:730 [inline] sock_sendmsg net/socket.c:753 [inline] ____sys_sendmsg+0x37c/0x4d0 net/socket.c:2540 ___sys_sendmsg net/socket.c:2594 [inline] __sys_sendmmsg+0x269/0x500 net/socket.c:2680 __do_sys_sendmmsg net/socket.c:2709 [inline] __se_sys_sendmmsg net/socket.c:2706 [inline] __x64_sys_sendmmsg+0x57/0x60 net/socket.c:2706 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000000010d7 -> 0x00000000000010d8 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 23987 Comm: syz-executor.5 Not tainted 6.5.0-syzkaller-10885-g0468be89b3fa #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface_core.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index b864740846902d..e21cc71095bb27 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -380,8 +380,8 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) skb->dev = dev; if (err) { - dev->stats.rx_errors++; - dev->stats.rx_dropped++; + DEV_STATS_INC(dev, rx_errors); + DEV_STATS_INC(dev, rx_dropped); return 0; } @@ -426,7 +426,6 @@ static int xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); unsigned int length = skb->len; struct net_device *tdev; @@ -473,7 +472,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) tdev = dst->dev; if (tdev == dev) { - stats->collisions++; + DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", dev->name); goto tx_err_dst_release; @@ -512,13 +511,13 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { - stats->tx_errors++; - stats->tx_aborted_errors++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_aborted_errors); } return 0; tx_err_link_failure: - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); @@ -528,7 +527,6 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); - struct net_device_stats *stats = &xi->dev->stats; struct dst_entry *dst = skb_dst(skb); struct flowi fl; int ret; @@ -545,7 +543,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); if (dst->error) { dst_release(dst); - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, dst); @@ -561,7 +559,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); if (IS_ERR(rt)) { - stats->tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, &rt->dst); @@ -580,8 +578,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; tx_err: - stats->tx_errors++; - stats->tx_dropped++; + DEV_STATS_INC(dev, tx_errors); + DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } From 3e01d5254698ea3d18e09d96b974c762328352cd Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:19 +0200 Subject: [PATCH 003/661] mtd: rawnand: marvell: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: stable@vger.kernel.org Fixes: 02f26ecf8c77 ("mtd: nand: add reworked Marvell NAND controller driver") Reported-by: Aviram Dali Signed-off-by: Miquel Raynal Tested-by: Ravi Chandra Minnikanti Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/marvell_nand.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 2c94da7a3b3a45..b841a81cb12822 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1165,6 +1165,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, .ndcb[2] = NDCB2_ADDR5_PAGE(page), }; unsigned int oob_bytes = lt->spare_bytes + (raw ? lt->ecc_bytes : 0); + u8 status; int ret; /* NFCv2 needs more information about the operation being executed */ @@ -1198,7 +1199,18 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, ret = marvell_nfc_wait_op(chip, PSEC_TO_MSEC(sdr->tPROG_max)); - return ret; + if (ret) + return ret; + + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; } static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip, @@ -1627,6 +1639,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip, int data_len = lt->data_bytes; int spare_len = lt->spare_bytes; int chunk, ret; + u8 status; marvell_nfc_select_target(chip, chip->cur_cs); @@ -1663,6 +1676,14 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip, if (ret) return ret; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + return 0; } From 6792b7fce610bcd1cf3e07af3607fe7e2c38c1d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 30 Aug 2023 17:00:34 +0200 Subject: [PATCH 004/661] mtd: physmap-core: Restore map_rom fallback When the exact mapping type driver was not available, the old physmap_of_core driver fell back to mapping the region as ROM. Unfortunately this feature was lost when the DT and pdata cases were merged. Revive this useful feature. Fixes: 642b1e8dbed7bbbf ("mtd: maps: Merge physmap_of.c into physmap-core.c") Signed-off-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/550e8c8c1da4c4baeb3d71ff79b14a18d4194f9e.1693407371.git.geert+renesas@glider.be --- drivers/mtd/maps/physmap-core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 78710fbc8e7f63..fc872133928247 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -551,6 +551,17 @@ static int physmap_flash_probe(struct platform_device *dev) if (info->probe_type) { info->mtds[i] = do_map_probe(info->probe_type, &info->maps[i]); + + /* Fall back to mapping region as ROM */ + if (!info->mtds[i] && IS_ENABLED(CONFIG_MTD_ROM) && + strcmp(info->probe_type, "map_rom")) { + dev_warn(&dev->dev, + "map_probe() failed for type %s\n", + info->probe_type); + + info->mtds[i] = do_map_probe("map_rom", + &info->maps[i]); + } } else { int j; From 9836a987860e33943945d4b257729a4f94eae576 Mon Sep 17 00:00:00 2001 From: Martin Kurbanov Date: Tue, 5 Sep 2023 17:56:37 +0300 Subject: [PATCH 005/661] mtd: spinand: micron: correct bitmask for ecc status Valid bitmask is 0x70 in the status register. Fixes: a508e8875e13 ("mtd: spinand: Add initial support for Micron MT29F2G01ABAGD") Signed-off-by: Martin Kurbanov Reviewed-by: Frieder Schrempf Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230905145637.139068-1-mmkurbanov@sberdevices.ru --- drivers/mtd/nand/spi/micron.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/micron.c b/drivers/mtd/nand/spi/micron.c index 50b7295bc92226..12601bc4227a72 100644 --- a/drivers/mtd/nand/spi/micron.c +++ b/drivers/mtd/nand/spi/micron.c @@ -12,7 +12,7 @@ #define SPINAND_MFR_MICRON 0x2c -#define MICRON_STATUS_ECC_MASK GENMASK(7, 4) +#define MICRON_STATUS_ECC_MASK GENMASK(6, 4) #define MICRON_STATUS_ECC_NO_BITFLIPS (0 << 4) #define MICRON_STATUS_ECC_1TO3_BITFLIPS (1 << 4) #define MICRON_STATUS_ECC_4TO6_BITFLIPS (3 << 4) From 3e4bc23926b83c3c67e5f61ae8571602754131a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Sep 2023 18:13:59 +0000 Subject: [PATCH 006/661] xfrm: fix a data-race in xfrm_gen_index() xfrm_gen_index() mutual exclusion uses net->xfrm.xfrm_policy_lock. This means we must use a per-netns idx_generator variable, instead of a static one. Alternative would be to use an atomic variable. syzbot reported: BUG: KCSAN: data-race in xfrm_sk_policy_insert / xfrm_sk_policy_insert write to 0xffffffff87005938 of 4 bytes by task 29466 on cpu 0: xfrm_gen_index net/xfrm/xfrm_policy.c:1385 [inline] xfrm_sk_policy_insert+0x262/0x640 net/xfrm/xfrm_policy.c:2347 xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffffffff87005938 of 4 bytes by task 29460 on cpu 1: xfrm_sk_policy_insert+0x13e/0x640 xfrm_user_policy+0x413/0x540 net/xfrm/xfrm_state.c:2639 do_ipv6_setsockopt+0x1317/0x2ce0 net/ipv6/ipv6_sockglue.c:943 ipv6_setsockopt+0x57/0x130 net/ipv6/ipv6_sockglue.c:1012 rawv6_setsockopt+0x21e/0x410 net/ipv6/raw.c:1054 sock_common_setsockopt+0x61/0x70 net/core/sock.c:3697 __sys_setsockopt+0x1c9/0x230 net/socket.c:2263 __do_sys_setsockopt net/socket.c:2274 [inline] __se_sys_setsockopt net/socket.c:2271 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2271 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00006ad8 -> 0x00006b18 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 29460 Comm: syz-executor.1 Not tainted 6.5.0-rc5-syzkaller-00243-g9106536c1aa3 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 Fixes: 1121994c803f ("netns xfrm: policy insertion in netns") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Cc: Herbert Xu Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd7c3be4af5d7b..423b52eca908d9 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -50,6 +50,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + unsigned int idx_generator; struct hlist_head policy_inexact[XFRM_POLICY_MAX]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 113fb7e9cdaf20..99df2862bdc951 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1375,8 +1375,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild); * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { - static u32 idx_generator; - for (;;) { struct hlist_head *list; struct xfrm_policy *p; @@ -1384,8 +1382,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) int found; if (!index) { - idx = (idx_generator | dir); - idx_generator += 8; + idx = (net->xfrm.idx_generator | dir); + net->xfrm.idx_generator += 8; } else { idx = index; index = 0; From 719606154c7033c068a5d4c1dc5f9163b814b3c8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:27 +0300 Subject: [PATCH 007/661] phy: mapphone-mdm6600: Fix runtime disable on probe Commit d644e0d79829 ("phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe") caused a regression where we now unconditionally disable runtime PM at the end of the probe while it is only needed on errors. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Miaoqian Lin Cc: Pavel Machek Reviewed-by: Sebastian Reichel Fixes: d644e0d79829 ("phy: mapphone-mdm6600: Fix PM error handling in phy_mdm6600_probe") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20230913060433.48373-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 1d567604b650d9..0147112f77b17e 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -627,10 +627,12 @@ static int phy_mdm6600_probe(struct platform_device *pdev) pm_runtime_put_autosuspend(ddata->dev); cleanup: - if (error < 0) + if (error < 0) { phy_mdm6600_device_power_off(ddata); - pm_runtime_disable(ddata->dev); - pm_runtime_dont_use_autosuspend(ddata->dev); + pm_runtime_disable(ddata->dev); + pm_runtime_dont_use_autosuspend(ddata->dev); + } + return error; } From b99e0ba9633af51638e5ee1668da2e33620c134f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:28 +0300 Subject: [PATCH 008/661] phy: mapphone-mdm6600: Fix runtime PM for remove Otherwise we will get an underflow on remove. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Fixes: f7f50b2a7b05 ("phy: mapphone-mdm6600: Add runtime PM support for n_gsm on USB suspend") Signed-off-by: Tony Lindgren Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230913060433.48373-2-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index 0147112f77b17e..df48b1becebe63 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -641,6 +641,7 @@ static void phy_mdm6600_remove(struct platform_device *pdev) struct phy_mdm6600 *ddata = platform_get_drvdata(pdev); struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET]; + pm_runtime_get_noresume(ddata->dev); pm_runtime_dont_use_autosuspend(ddata->dev); pm_runtime_put_sync(ddata->dev); pm_runtime_disable(ddata->dev); From 3b384cc74b00b5ac21d18e4c1efc3c1da5300971 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 13 Sep 2023 09:04:29 +0300 Subject: [PATCH 009/661] phy: mapphone-mdm6600: Fix pinctrl_pm handling for sleep pins Looks like the driver sleep pins configuration is unusable. Adding the sleep pins causes the usb phy to not respond. We need to use the default pins in probe, and only set sleep pins at phy_mdm6600_device_power_off(). As the modem can also be booted to a serial port mode for firmware flashing, let's make the pin changes limited to probe and remove. For probe, we get the default pins automatically. We only need to set the sleep pins in phy_mdm6600_device_power_off() to prevent the modem from waking up because the gpio line glitches. If it turns out that we need a separate state for phy_mdm6600_power_on() and phy_mdm6600_power_off(), we can use the pinctrl idle state. Cc: Ivaylo Dimitrov Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Fixes: 2ad2af081622 ("phy: mapphone-mdm6600: Improve phy related runtime PM calls") Signed-off-by: Tony Lindgren Reviewed-by: Sebastian Reichel Link: https://lore.kernel.org/r/20230913060433.48373-3-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-mapphone-mdm6600.c | 29 +++++++++------------ 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index df48b1becebe63..376d023a0aa909 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -122,16 +122,10 @@ static int phy_mdm6600_power_on(struct phy *x) { struct phy_mdm6600 *ddata = phy_get_drvdata(x); struct gpio_desc *enable_gpio = ddata->ctrl_gpios[PHY_MDM6600_ENABLE]; - int error; if (!ddata->enabled) return -ENODEV; - error = pinctrl_pm_select_default_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with default_state: %i\n", - __func__, error); - gpiod_set_value_cansleep(enable_gpio, 1); /* Allow aggressive PM for USB, it's only needed for n_gsm port */ @@ -160,11 +154,6 @@ static int phy_mdm6600_power_off(struct phy *x) gpiod_set_value_cansleep(enable_gpio, 0); - error = pinctrl_pm_select_sleep_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", - __func__, error); - return 0; } @@ -456,6 +445,7 @@ static void phy_mdm6600_device_power_off(struct phy_mdm6600 *ddata) { struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET]; + int error; ddata->enabled = false; phy_mdm6600_cmd(ddata, PHY_MDM6600_CMD_BP_SHUTDOWN_REQ); @@ -471,6 +461,17 @@ static void phy_mdm6600_device_power_off(struct phy_mdm6600 *ddata) } else { dev_err(ddata->dev, "Timed out powering down\n"); } + + /* + * Keep reset gpio high with padconf internal pull-up resistor to + * prevent modem from waking up during deeper SoC idle states. The + * gpio bank lines can have glitches if not in the always-on wkup + * domain. + */ + error = pinctrl_pm_select_sleep_state(ddata->dev); + if (error) + dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", + __func__, error); } static void phy_mdm6600_deferred_power_on(struct work_struct *work) @@ -571,12 +572,6 @@ static int phy_mdm6600_probe(struct platform_device *pdev) ddata->dev = &pdev->dev; platform_set_drvdata(pdev, ddata); - /* Active state selected in phy_mdm6600_power_on() */ - error = pinctrl_pm_select_sleep_state(ddata->dev); - if (error) - dev_warn(ddata->dev, "%s: error with sleep_state: %i\n", - __func__, error); - error = phy_mdm6600_init_lines(ddata); if (error) return error; From f822899c28572a854f2c746da5ed707d752458ab Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 18 Aug 2023 15:55:38 +0200 Subject: [PATCH 010/661] arm64: dts: qcom: sa8775p: correct PMIC GPIO label in gpio-ranges There are several PMICs with GPIO nodes and one of the nodes referenced other's in gpio-ranges which could result in deferred-probes like: qcom-spmi-gpio c440000.spmi:pmic@2:gpio@8800: can't add gpio chip Reported-by: Brian Masney Closes: https://lore.kernel.org/all/ZN5KIlI+RDu92jsi@brian-x1/ Fixes: e5a893a7cec5 ("arm64: dts: qcom: sa8775p: add PMIC GPIO controller nodes") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Brian Masney Reviewed-by: Bartosz Golaszewski Tested-by: Bartosz Golaszewski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230818135538.47481-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi b/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi index 3c3b6287cd2748..eaa43f022a6549 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi +++ b/arch/arm64/boot/dts/qcom/sa8775p-pmics.dtsi @@ -173,7 +173,7 @@ compatible = "qcom,pmm8654au-gpio", "qcom,spmi-gpio"; reg = <0x8800>; gpio-controller; - gpio-ranges = <&pmm8654au_2_gpios 0 0 12>; + gpio-ranges = <&pmm8654au_1_gpios 0 0 12>; #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; From 2ca3e844e3f978c0dbc95072dbf379abfc4a27db Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 1 Sep 2023 10:18:11 +0200 Subject: [PATCH 011/661] arm64: dts: qcom: apq8096-db820c: fix missing clock populate Commit 704e26678c8d ("arm64: dts: qcom: apq8096-db820c: drop simple-bus from clocks") removed "simple-bus" compatible from "clocks" node, but one of the clocks - divclk1 - is a gpio-gate-clock, which does not have CLK_OF_DECLARE. This means it will not be instantiated if placed in some subnode. Move the clocks to the root node, so regular devices will be populated. Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/all/CAA8EJprF==p87oN+RiwAiNeURF1JcHGfL2Ez5zxqYPRRbN-hhg@mail.gmail.com/ Cc: stable@vger.kernel.org Fixes: 704e26678c8d ("arm64: dts: qcom: apq8096-db820c: drop simple-bus from clocks") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Tested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230901081812.19121-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/apq8096-db820c.dts | 32 ++++++++++----------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dts b/arch/arm64/boot/dts/qcom/apq8096-db820c.dts index 385b178314db95..3067a4091a7afb 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dts +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dts @@ -62,25 +62,23 @@ stdout-path = "serial0:115200n8"; }; - clocks { - divclk4: divclk4 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "divclk4"; + div1_mclk: divclk1 { + compatible = "gpio-gate-clock"; + pinctrl-0 = <&audio_mclk>; + pinctrl-names = "default"; + clocks = <&rpmcc RPM_SMD_DIV_CLK1>; + #clock-cells = <0>; + enable-gpios = <&pm8994_gpios 15 0>; + }; - pinctrl-names = "default"; - pinctrl-0 = <&divclk4_pin_a>; - }; + divclk4: divclk4 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk4"; - div1_mclk: divclk1 { - compatible = "gpio-gate-clock"; - pinctrl-0 = <&audio_mclk>; - pinctrl-names = "default"; - clocks = <&rpmcc RPM_SMD_DIV_CLK1>; - #clock-cells = <0>; - enable-gpios = <&pm8994_gpios 15 0>; - }; + pinctrl-names = "default"; + pinctrl-0 = <&divclk4_pin_a>; }; gpio-keys { From 725f593692ceedeab639b661298955b6f9ba8ec3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 1 Sep 2023 10:18:12 +0200 Subject: [PATCH 012/661] arm64: dts: qcom: msm8996-xiaomi: fix missing clock populate Commit 338958e30c68 ("arm64: dts: qcom: msm8996-xiaomi: drop simple-bus from clocks") removed "simple-bus" compatible from "clocks" node, but one of the clocks - divclk1 - is a gpio-gate-clock, which does not have CLK_OF_DECLARE. This means it will not be instantiated if placed in some subnode. Move the clocks to the root node, so regular devices will be populated. Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/all/CAA8EJprF==p87oN+RiwAiNeURF1JcHGfL2Ez5zxqYPRRbN-hhg@mail.gmail.com/ Cc: stable@vger.kernel.org Fixes: 338958e30c68 ("arm64: dts: qcom: msm8996-xiaomi: drop simple-bus from clocks") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230901081812.19121-2-krzysztof.kozlowski@linaro.org Signed-off-by: Bjorn Andersson --- .../boot/dts/qcom/msm8996-xiaomi-common.dtsi | 32 +++++++++---------- .../boot/dts/qcom/msm8996-xiaomi-gemini.dts | 18 +++++------ 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi index bcd2397eb37339..06f8ff624181fc 100644 --- a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-common.dtsi @@ -11,26 +11,24 @@ #include / { - clocks { - divclk1_cdc: divclk1 { - compatible = "gpio-gate-clock"; - clocks = <&rpmcc RPM_SMD_DIV_CLK1>; - #clock-cells = <0>; - enable-gpios = <&pm8994_gpios 15 GPIO_ACTIVE_HIGH>; + divclk1_cdc: divclk1 { + compatible = "gpio-gate-clock"; + clocks = <&rpmcc RPM_SMD_DIV_CLK1>; + #clock-cells = <0>; + enable-gpios = <&pm8994_gpios 15 GPIO_ACTIVE_HIGH>; - pinctrl-names = "default"; - pinctrl-0 = <&divclk1_default>; - }; + pinctrl-names = "default"; + pinctrl-0 = <&divclk1_default>; + }; - divclk4: divclk4 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "divclk4"; + divclk4: divclk4 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk4"; - pinctrl-names = "default"; - pinctrl-0 = <&divclk4_pin_a>; - }; + pinctrl-names = "default"; + pinctrl-0 = <&divclk4_pin_a>; }; gpio-keys { diff --git a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts index d1066edaea471d..f8e9d90afab000 100644 --- a/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts +++ b/arch/arm64/boot/dts/qcom/msm8996-xiaomi-gemini.dts @@ -20,16 +20,14 @@ qcom,pmic-id = <0x20009 0x2000a 0x00 0x00>; qcom,board-id = <31 0>; - clocks { - divclk2_haptics: divclk2 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "divclk2"; - - pinctrl-names = "default"; - pinctrl-0 = <&divclk2_pin_a>; - }; + divclk2_haptics: divclk2 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "divclk2"; + + pinctrl-names = "default"; + pinctrl-0 = <&divclk2_pin_a>; }; }; From b1841d8e51d26e9c104cc89805b949f1aec449ae Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Thu, 14 Sep 2023 11:51:57 +0200 Subject: [PATCH 013/661] MAINTAINERS: uDPU: make myself maintainer of it Vladimir is no longer at the company, so I am stepping up to maintain the Methode boards. Signed-off-by: Robert Marko Signed-off-by: Gregory CLEMENT --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d29708..fe93c57bf1459d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13847,7 +13847,7 @@ F: Documentation/devicetree/bindings/media/amlogic,gx-vdec.yaml F: drivers/staging/media/meson/vdec/ METHODE UDPU SUPPORT -M: Vladimir Vid +M: Robert Marko S: Maintained F: arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts From 1122a9c2ebe02435014a0f40dcc50f4333286f0f Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Thu, 14 Sep 2023 11:51:58 +0200 Subject: [PATCH 014/661] MAINTAINERS: uDPU: add remaining Methode boards Methode also has eDPU which is based on uDPU, so add eDPU as well as the common uDPU DTSI. Signed-off-by: Robert Marko Signed-off-by: Gregory CLEMENT --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe93c57bf1459d..46df534afb9357 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13849,7 +13849,8 @@ F: drivers/staging/media/meson/vdec/ METHODE UDPU SUPPORT M: Robert Marko S: Maintained -F: arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts +F: arch/arm64/boot/dts/marvell/armada-3720-eDPU.dts +F: arch/arm64/boot/dts/marvell/armada-3720-uDPU.* MHI BUS M: Manivannan Sadhasivam From cc9b364bb1d58d3dae270c7a931a8cc717dc2b3b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 15 Sep 2023 19:20:41 +0800 Subject: [PATCH 015/661] xfrm6: fix inet6_dev refcount underflow problem There are race conditions that may lead to inet6_dev refcount underflow in xfrm6_dst_destroy() and rt6_uncached_list_flush_dev(). One of the refcount underflow bugs is shown below: (cpu 1) | (cpu 2) xfrm6_dst_destroy() | ... | in6_dev_put() | | rt6_uncached_list_flush_dev() ... | ... | in6_dev_put() rt6_uncached_list_del() | ... ... | xfrm6_dst_destroy() calls rt6_uncached_list_del() after in6_dev_put(), so rt6_uncached_list_flush_dev() has a chance to call in6_dev_put() again for the same inet6_dev. Fix it by moving in6_dev_put() after rt6_uncached_list_del() in xfrm6_dst_destroy(). Fixes: 510c321b5571 ("xfrm: reuse uncached_list to track xdsts") Signed-off-by: Zhang Changzhong Reviewed-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 188224a76685dd..45d0f9a8b28cd8 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - if (likely(xdst->u.rt6.rt6i_idev)) - in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); rt6_uncached_list_del(&xdst->u.rt6); + if (likely(xdst->u.rt6.rt6i_idev)) + in6_dev_put(xdst->u.rt6.rt6i_idev); xfrm_dst_destroy(xdst); } From 2d3465a75c9f83684d17da6807423824bf260524 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Mon, 28 Aug 2023 11:23:50 -0400 Subject: [PATCH 016/661] phy: qcom-qmp-usb: initialize PCS_USB registers Currently, PCS_USB registers that have their initialization data in a pcs_usb_tbl table are never initialized. Fix that. Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Signed-off-by: Adrien Thierry Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20230828152353.16529-2-athierry@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index 0130bb8e809afc..e49262ce6d91da 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1703,6 +1703,7 @@ static int qmp_usb_power_on(struct phy *phy) void __iomem *tx = qmp->tx; void __iomem *rx = qmp->rx; void __iomem *pcs = qmp->pcs; + void __iomem *pcs_usb = qmp->pcs_usb; void __iomem *status; unsigned int val; int ret; @@ -1726,6 +1727,9 @@ static int qmp_usb_power_on(struct phy *phy) qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + if (pcs_usb) + qmp_usb_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num); + if (cfg->has_pwrdn_delay) usleep_range(10, 20); From c599dc5cca4dd6a5c664e4a8837246e68a96cb4c Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Mon, 28 Aug 2023 11:23:51 -0400 Subject: [PATCH 017/661] phy: qcom-qmp-usb: split PCS_USB init table for sc8280xp and sa8775p For sc8280xp and sa8775p, PCS and PCS_USB initialization data is described in the same table, thus the pcs_usb offset is not being applied during initialization of PCS_USB registers. Fix this by adding the appropriate pcs_usb_tbl tables. Fixes: 8bd2d6e11c99 ("phy: qcom-qmp: Add SA8775P USB3 UNI phy") Fixes: c0c7769cdae2 ("phy: qcom-qmp: Add SC8280XP USB3 UNI phy") Reviewed-by: Dmitry Baryshkov Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230828152353.16529-3-athierry@redhat.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c index e49262ce6d91da..c69577601ae004 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c @@ -1112,8 +1112,6 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0xaa), QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_V5_PCS_CDR_RESET_TIME, 0x0a), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG1, 0x88), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG2, 0x13), @@ -1122,6 +1120,11 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), +}; + static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_LOCK_DETECT_CONFIG1, 0xc4), QMP_PHY_INIT_CFG(QPHY_V5_PCS_LOCK_DETECT_CONFIG2, 0x89), @@ -1131,9 +1134,6 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03), QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0xaa), QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCS_TX_RX_CONFIG, 0x0c), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), - QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), QMP_PHY_INIT_CFG(QPHY_V5_PCS_CDR_RESET_TIME, 0x0a), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG1, 0x88), QMP_PHY_INIT_CFG(QPHY_V5_PCS_ALIGN_DETECT_CONFIG2, 0x13), @@ -1142,6 +1142,12 @@ static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21), }; +static const struct qmp_phy_init_tbl sa8775p_usb3_uniphy_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), + QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1, 0x6f), +}; + struct qmp_usb_offsets { u16 serdes; u16 pcs; @@ -1383,6 +1389,8 @@ static const struct qmp_phy_cfg sa8775p_usb3_uniphy_cfg = { .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_rx_tbl), .pcs_tbl = sa8775p_usb3_uniphy_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(sa8775p_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sa8775p_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sa8775p_usb3_uniphy_pcs_usb_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = qcm2290_usb3phy_reset_l, @@ -1405,6 +1413,8 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = { .rx_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_rx_tbl), .pcs_tbl = sc8280xp_usb3_uniphy_pcs_tbl, .pcs_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_tbl), + .pcs_usb_tbl = sc8280xp_usb3_uniphy_pcs_usb_tbl, + .pcs_usb_tbl_num = ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_usb_tbl), .clk_list = qmp_v4_phy_clk_l, .num_clks = ARRAY_SIZE(qmp_v4_phy_clk_l), .reset_list = qcm2290_usb3phy_reset_l, From 211de9681195b92709b5d68e07af948b01c8bb9a Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 23 Aug 2023 21:17:28 +0300 Subject: [PATCH 018/661] dt-bindings: phy: qcom,ipq8074-qmp-pcie: fix warning regarding reg size Fix the 'reg is too long' warning caused by me adding 64-bit address and size to the example, while default being 32-bit (cell size equal to 1). Reported-by: Rob Herring Fixes: 505fb2541678 ("dt-bindings: phy: migrate QMP PCIe PHY bindings to qcom,sc8280xp-qmp-pcie-phy.yaml") Signed-off-by: Dmitry Baryshkov Acked-by: Rob Herring Link: https://lore.kernel.org/r/20230823181728.3082946-1-dmitry.baryshkov@linaro.org Signed-off-by: Vinod Koul --- .../devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml index 5073007267ad44..634cec5d57ea85 100644 --- a/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml @@ -70,7 +70,7 @@ examples: phy@84000 { compatible = "qcom,ipq6018-qmp-pcie-phy"; - reg = <0x0 0x00084000 0x0 0x1000>; + reg = <0x00084000 0x1000>; clocks = <&gcc GCC_PCIE0_AUX_CLK>, <&gcc GCC_PCIE0_AHB_CLK>, From 5f7cd740a6b657fba775bde744496e5ed21851ca Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:13:45 +0800 Subject: [PATCH 019/661] phy: qcom: phy-qcom-m31: fix wrong pointer pass to PTR_ERR() It should be 'qphy->vreg' passed to PTR_ERR() when devm_regulator_get() fails. Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824091345.1072650-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index ed08072ca032ca..99d570f4142a42 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -256,7 +256,7 @@ static int m31usb_phy_probe(struct platform_device *pdev) qphy->vreg = devm_regulator_get(dev, "vdda-phy"); if (IS_ERR(qphy->vreg)) - return dev_err_probe(dev, PTR_ERR(qphy->phy), + return dev_err_probe(dev, PTR_ERR(qphy->vreg), "failed to get vreg\n"); phy_set_drvdata(qphy->phy, qphy); From 426e05ce126e8febc21fae643139a1072d2670ad Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Aug 2023 17:23:56 +0800 Subject: [PATCH 020/661] phy: qcom: phy-qcom-m31: change m31_ipq5332_regs to static m31_ipq5332_regs is only used in phy-qcom-m31.c now, change it to static. Signed-off-by: Yang Yingliang Reviewed-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20230824092356.1154839-1-yangyingliang@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 99d570f4142a42..014278e5428c9c 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -82,7 +82,7 @@ struct m31_priv_data { unsigned int nregs; }; -struct m31_phy_regs m31_ipq5332_regs[] = { +static struct m31_phy_regs m31_ipq5332_regs[] = { { USB_PHY_CFG0, UTMI_PHY_OVERRIDE_EN, From 6ee8a9a772b5abd9a9791123ca7aee2dbf126d5f Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 1 Sep 2023 15:52:31 +0800 Subject: [PATCH 021/661] phy: realtek: usb: Drop unnecessary error check for debugfs_create_dir() Both debugfs_create_dir() and debugfs_create_file() return ERR_PTR and never return NULL. As Greg suggested, this patch removes the error checking for debugfs_create_dir in phy-rtk-usb2.c and phy-rtk-usb3.c. This is because the DebugFS kernel API is developed in a way that the caller can safely ignore the errors that occur during the creation of DebugFS nodes. The debugfs APIs have a IS_ERR() judge in start_creating() which can handle it gracefully. So these checks are unnecessary. Fixes: 134e6d25f6bd ("phy: realtek: usb: Add driver for the Realtek SoC USB 2.0 PHY") Fixes: adda6e82a7de ("phy: realtek: usb: Add driver for the Realtek SoC USB 3.0 PHY") Signed-off-by: Jinjie Ruan Suggested-by: Greg Kroah-Hartman Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20230901075231.1368947-1-ruanjinjie@huawei.com Signed-off-by: Vinod Koul --- drivers/phy/realtek/phy-rtk-usb2.c | 10 ++-------- drivers/phy/realtek/phy-rtk-usb3.c | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/phy/realtek/phy-rtk-usb2.c b/drivers/phy/realtek/phy-rtk-usb2.c index 5e7ee060b404fa..aedc78bd37f733 100644 --- a/drivers/phy/realtek/phy-rtk-usb2.c +++ b/drivers/phy/realtek/phy-rtk-usb2.c @@ -853,17 +853,11 @@ static inline void create_debug_files(struct rtk_phy *rtk_phy) rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - if (!rtk_phy->debug_dir) - return; - if (!debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb2_parameter_fops)) - goto file_error; + debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, + &rtk_usb2_parameter_fops); return; - -file_error: - debugfs_remove_recursive(rtk_phy->debug_dir); } static inline void remove_debug_files(struct rtk_phy *rtk_phy) diff --git a/drivers/phy/realtek/phy-rtk-usb3.c b/drivers/phy/realtek/phy-rtk-usb3.c index 7881f908aadea8..dfb3122f3f114b 100644 --- a/drivers/phy/realtek/phy-rtk-usb3.c +++ b/drivers/phy/realtek/phy-rtk-usb3.c @@ -416,17 +416,11 @@ static inline void create_debug_files(struct rtk_phy *rtk_phy) return; rtk_phy->debug_dir = debugfs_create_dir(dev_name(rtk_phy->dev), phy_debug_root); - if (!rtk_phy->debug_dir) - return; - if (!debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, - &rtk_usb3_parameter_fops)) - goto file_error; + debugfs_create_file("parameter", 0444, rtk_phy->debug_dir, rtk_phy, + &rtk_usb3_parameter_fops); return; - -file_error: - debugfs_remove_recursive(rtk_phy->debug_dir); } static inline void remove_debug_files(struct rtk_phy *rtk_phy) From ecec1de5c58f8f3ab6959fcf8d68752eeb65311d Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Thu, 7 Sep 2023 12:20:52 +0530 Subject: [PATCH 022/661] phy: qcom: m31: Remove unwanted qphy->vreg is NULL check Fix the following Smatch complaint: drivers/phy/qualcomm/phy-qcom-m31.c:175 m31usb_phy_init() warn: variable dereferenced before check 'qphy->vreg' (see line 167) drivers/phy/qualcomm/phy-qcom-m31.c 166 167 ret = regulator_enable(qphy->vreg); ^^^^^^^^^^ Unchecked dereference 168 if (ret) { 169 dev_err(&phy->dev, "failed to enable regulator, %d\n", ret); 170 return ret; 171 } 172 173 ret = clk_prepare_enable(qphy->clk); 174 if (ret) { 175 if (qphy->vreg) ^^^^^^^^^^ Checked too late 176 regulator_disable(qphy->vreg); 177 dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); Since the phy will not get registered if qphy->vreg is NULL, this check is not needed. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-phy/cbd26132-c624-44b7-a073-73222b287338@moroto.mountain/T/#u Fixes: 08e49af50701 ("phy: qcom: Introduce M31 USB PHY driver") Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/1694069452-3794-1-git-send-email-quic_varada@quicinc.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-m31.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-m31.c b/drivers/phy/qualcomm/phy-qcom-m31.c index 014278e5428c9c..5cb7e79b99b3f5 100644 --- a/drivers/phy/qualcomm/phy-qcom-m31.c +++ b/drivers/phy/qualcomm/phy-qcom-m31.c @@ -172,8 +172,7 @@ static int m31usb_phy_init(struct phy *phy) ret = clk_prepare_enable(qphy->clk); if (ret) { - if (qphy->vreg) - regulator_disable(qphy->vreg); + regulator_disable(qphy->vreg); dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret); return ret; } From 112c23705c6dc59a05290c8e3e597e1b4e9c23fc Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 11 Sep 2023 22:07:14 +0200 Subject: [PATCH 023/661] phy: qcom-qmp-combo: Square out 8550 POWER_STATE_CONFIG1 There are two instances of the POWER_STATE_CONFIG1 register: one in the PCS space and another one in PCS_USB. The downstream init sequence pokes the latter one while we've been poking the former one (and misnamed it as the latter one, impostor!). Fix that up to avoid UB. Fixes: 49742e9edab3 ("phy: qcom-qmp-combo: Add support for SM8550") Reviewed-by: Abel Vesa Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230829-topic-8550_usbphy-v3-1-34ec434194c5@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +- drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index cbb28afce13541..8fd240dd5127a1 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -859,10 +859,10 @@ static const struct qmp_phy_init_tbl sm8550_usb3_pcs_tbl[] = { QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_PCS_TX_RX_CONFIG, 0x0c), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_EQ_CONFIG1, 0x4b), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_EQ_CONFIG5, 0x10), - QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1, 0x68), }; static const struct qmp_phy_init_tbl sm8550_usb3_pcs_usb_tbl[] = { + QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1, 0x68), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07), QMP_PHY_INIT_CFG(QPHY_USB_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40), diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h index 9510e63ba9d8ab..c38530d6776b4a 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-usb-v6.h @@ -12,7 +12,7 @@ #define QPHY_USB_V6_PCS_LOCK_DETECT_CONFIG3 0xcc #define QPHY_USB_V6_PCS_LOCK_DETECT_CONFIG6 0xd8 #define QPHY_USB_V6_PCS_REFGEN_REQ_CONFIG1 0xdc -#define QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1 0x90 +#define QPHY_USB_V6_PCS_POWER_STATE_CONFIG1 0x90 #define QPHY_USB_V6_PCS_RX_SIGDET_LVL 0x188 #define QPHY_USB_V6_PCS_RCVR_DTCT_DLY_P1U2_L 0x190 #define QPHY_USB_V6_PCS_RCVR_DTCT_DLY_P1U2_H 0x194 @@ -23,6 +23,7 @@ #define QPHY_USB_V6_PCS_EQ_CONFIG1 0x1dc #define QPHY_USB_V6_PCS_EQ_CONFIG5 0x1ec +#define QPHY_USB_V6_PCS_USB3_POWER_STATE_CONFIG1 0x00 #define QPHY_USB_V6_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL 0x18 #define QPHY_USB_V6_PCS_USB3_RXEQTRAINING_DFE_TIME_S2 0x3c #define QPHY_USB_V6_PCS_USB3_RCVR_DTCT_DLY_U3_L 0x40 From 76d20290d0c66a84a7a40c6231e73d1ab25994e5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 11 Sep 2023 22:07:15 +0200 Subject: [PATCH 024/661] phy: qcom-qmp-combo: initialize PCS_USB registers Currently, PCS_USB registers that have their initialization data in a pcs_usb_tbl table are never initialized. Fix that. Fixes: fc64623637da ("phy: qcom-qmp-combo,usb: add support for separate PCS_USB region") Reported-by: Adrien Thierry Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230829-topic-8550_usbphy-v3-2-34ec434194c5@linaro.org Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 8fd240dd5127a1..5e6fc8103e9d81 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -2555,6 +2555,7 @@ static int qmp_combo_usb_power_on(struct phy *phy) void __iomem *tx2 = qmp->tx2; void __iomem *rx2 = qmp->rx2; void __iomem *pcs = qmp->pcs; + void __iomem *pcs_usb = qmp->pcs_usb; void __iomem *status; unsigned int val; int ret; @@ -2576,6 +2577,9 @@ static int qmp_combo_usb_power_on(struct phy *phy) qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num); + if (pcs_usb) + qmp_combo_configure(pcs_usb, cfg->pcs_usb_tbl, cfg->pcs_usb_tbl_num); + if (cfg->has_pwrdn_delay) usleep_range(10, 20); From 11395c32f9e9e26f2f6281bd916a1161ba42ee6c Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Tue, 12 Sep 2023 07:46:46 -0400 Subject: [PATCH 025/661] phy: qualcomm: Fix typos in comments Fix typo in the description of the 'succesfully'. Signed-off-by: Bo Liu Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230912114646.8452-1-liubo03@inspur.com Signed-off-by: Vinod Koul --- drivers/phy/qualcomm/phy-qcom-apq8064-sata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 8814f4322adffb..3642a5d4f2f3b8 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -152,7 +152,7 @@ static int qcom_apq8064_sata_phy_init(struct phy *generic_phy) return ret; } - /* SATA phy calibrated succesfully, power up to functional mode */ + /* SATA phy calibrated successfully, power up to functional mode */ writel_relaxed(0x3E, base + SATA_PHY_POW_DWN_CTRL1); writel_relaxed(0x01, base + SATA_PHY_RX_IMCAL0); writel_relaxed(0x01, base + SATA_PHY_TX_IMCAL0); From 3a4a893dbb19e229db3b753f0462520b561dee98 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:20 +0200 Subject: [PATCH 026/661] mtd: rawnand: arasan: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: Michal Simek Cc: stable@vger.kernel.org Fixes: 88ffef1b65cf ("mtd: rawnand: arasan: Support the hardware BCH ECC engine") Signed-off-by: Miquel Raynal Acked-by: Michal Simek Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/arasan-nand-controller.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c index 4621ec549cc79e..a492051c46f596 100644 --- a/drivers/mtd/nand/raw/arasan-nand-controller.c +++ b/drivers/mtd/nand/raw/arasan-nand-controller.c @@ -515,6 +515,7 @@ static int anfc_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, struct mtd_info *mtd = nand_to_mtd(chip); unsigned int len = mtd->writesize + (oob_required ? mtd->oobsize : 0); dma_addr_t dma_addr; + u8 status; int ret; struct anfc_op nfc_op = { .pkt_reg = @@ -561,10 +562,21 @@ static int anfc_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, } /* Spare data is not protected */ - if (oob_required) + if (oob_required) { ret = nand_write_oob_std(chip, page); + if (ret) + return ret; + } - return ret; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + return ret; + + if (status & NAND_STATUS_FAIL) + return -EIO; + + return 0; } static int anfc_sel_write_page_hw_ecc(struct nand_chip *chip, const u8 *buf, From 9777cc13fd2c3212618904636354be60835e10bb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 17 Jul 2023 21:42:21 +0200 Subject: [PATCH 027/661] mtd: rawnand: pl353: Ensure program page operations are successful The NAND core complies with the ONFI specification, which itself mentions that after any program or erase operation, a status check should be performed to see whether the operation was finished *and* successful. The NAND core offers helpers to finish a page write (sending the "PAGE PROG" command, waiting for the NAND chip to be ready again, and checking the operation status). But in some cases, advanced controller drivers might want to optimize this and craft their own page write helper to leverage additional hardware capabilities, thus not always using the core facilities. Some drivers, like this one, do not use the core helper to finish a page write because the final cycles are automatically managed by the hardware. In this case, the additional care must be taken to manually perform the final status check. Let's read the NAND chip status at the end of the page write helper and return -EIO upon error. Cc: Michal Simek Cc: stable@vger.kernel.org Fixes: 08d8c62164a3 ("mtd: rawnand: pl353: Add support for the ARM PL353 SMC NAND controller") Signed-off-by: Miquel Raynal Tested-by: Michal Simek Link: https://lore.kernel.org/linux-mtd/20230717194221.229778-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/pl35x-nand-controller.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mtd/nand/raw/pl35x-nand-controller.c b/drivers/mtd/nand/raw/pl35x-nand-controller.c index 8da5fee321b571..c506e92a3e457d 100644 --- a/drivers/mtd/nand/raw/pl35x-nand-controller.c +++ b/drivers/mtd/nand/raw/pl35x-nand-controller.c @@ -511,6 +511,7 @@ static int pl35x_nand_write_page_hwecc(struct nand_chip *chip, u32 addr1 = 0, addr2 = 0, row; u32 cmd_addr; int i, ret; + u8 status; ret = pl35x_smc_set_ecc_mode(nfc, chip, PL35X_SMC_ECC_CFG_MODE_APB); if (ret) @@ -563,6 +564,14 @@ static int pl35x_nand_write_page_hwecc(struct nand_chip *chip, if (ret) goto disable_ecc_engine; + /* Check write status on the chip side */ + ret = nand_status_op(chip, &status); + if (ret) + goto disable_ecc_engine; + + if (status & NAND_STATUS_FAIL) + ret = -EIO; + disable_ecc_engine: pl35x_smc_set_ecc_mode(nfc, chip, PL35X_SMC_ECC_CFG_MODE_BYPASS); From 5279f4a9eed3ee7d222b76511ea7a22c89e7eefd Mon Sep 17 00:00:00 2001 From: Bibek Kumar Patro Date: Wed, 13 Sep 2023 12:37:02 +0530 Subject: [PATCH 028/661] mtd: rawnand: qcom: Unmap the right resource upon probe failure We currently provide the physical address of the DMA region rather than the output of dma_map_resource() which is obviously wrong. Fixes: 7330fc505af4 ("mtd: rawnand: qcom: stop using phys_to_dma()") Cc: stable@vger.kernel.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bibek Kumar Patro Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230913070702.12707-1-quic_bibekkum@quicinc.com --- drivers/mtd/nand/raw/qcom_nandc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 64499c1b36032c..b079605c84d382 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -3444,7 +3444,7 @@ static int qcom_nandc_probe(struct platform_device *pdev) err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - dma_unmap_resource(dev, res->start, resource_size(res), + dma_unmap_resource(dev, nandc->base_dma, resource_size(res), DMA_BIDIRECTIONAL, 0); return ret; } From 053d7dcd3440fa953ef4ca08de8e0a33d23d3b29 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:37 -0700 Subject: [PATCH 029/661] fbdev: mmp: Annotate struct mmphw_ctrl with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct mmphw_ctrl. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Helge Deller --- drivers/video/fbdev/mmp/hw/mmp_ctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/mmp/hw/mmp_ctrl.h b/drivers/video/fbdev/mmp/hw/mmp_ctrl.h index 167585a889d318..719b99a9bc7762 100644 --- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.h +++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.h @@ -1406,7 +1406,7 @@ struct mmphw_ctrl { /*pathes*/ int path_num; - struct mmphw_path_plat path_plats[]; + struct mmphw_path_plat path_plats[] __counted_by(path_num); }; static inline int overlay_is_vid(struct mmp_overlay *overlay) From e34872523ca56b6a3ed7a5b06febdc66b4f16e3c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 22 Sep 2023 10:51:41 -0700 Subject: [PATCH 030/661] fbdev: mmp: Annotate struct mmp_path with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct mmp_path. [1] https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Kees Cook Signed-off-by: Helge Deller --- include/video/mmp_disp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/video/mmp_disp.h b/include/video/mmp_disp.h index 77252cb46361f6..a722dcbf5073da 100644 --- a/include/video/mmp_disp.h +++ b/include/video/mmp_disp.h @@ -231,7 +231,7 @@ struct mmp_path { /* layers */ int overlay_num; - struct mmp_overlay overlays[]; + struct mmp_overlay overlays[] __counted_by(overlay_num); }; extern struct mmp_path *mmp_get_path(const char *name); From c1a8d1d0edb71dec15c9649cb56866c71c1ecd9e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 21 Sep 2023 19:04:21 +0800 Subject: [PATCH 031/661] fbdev: atyfb: only use ioremap_uc() on i386 and ia64 ioremap_uc() is only meaningful on old x86-32 systems with the PAT extension, and on ia64 with its slightly unconventional ioremap() behavior, everywhere else this is the same as ioremap() anyway. Change the only driver that still references ioremap_uc() to only do so on x86-32/ia64 in order to allow removing that interface at some point in the future for the other architectures. On some architectures, ioremap_uc() just returns NULL, changing the driver to call ioremap() means that they now have a chance of working correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Baoquan He Reviewed-by: Luis Chamberlain Cc: Helge Deller Cc: Thomas Zimmermann Cc: Christophe Leroy Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Signed-off-by: Helge Deller --- drivers/video/fbdev/aty/atyfb_base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 5c87817a4f4ce6..3dcf83f5e7b4a1 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -3440,11 +3440,15 @@ static int atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *info, } info->fix.mmio_start = raddr; +#if defined(__i386__) || defined(__ia64__) /* * By using strong UC we force the MTRR to never have an * effect on the MMIO region on both non-PAT and PAT systems. */ par->ati_regbase = ioremap_uc(info->fix.mmio_start, 0x1000); +#else + par->ati_regbase = ioremap(info->fix.mmio_start, 0x1000); +#endif if (par->ati_regbase == NULL) return -ENOMEM; From f87ef5723536a6545ed9c43e18b13a9faceb3c80 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 1 Sep 2023 12:58:23 +0200 Subject: [PATCH 032/661] KVM: s390: fix gisa destroy operation might lead to cpu stalls A GISA cannot be destroyed as long it is linked in the GIB alert list as this would break the alert list. Just waiting for its removal from the list triggered by another vm is not sufficient as it might be the only vm. The below shown cpu stall situation might occur when GIB alerts are delayed and is fixed by calling process_gib_alert_list() instead of waiting. At this time the vcpus of the vm are already destroyed and thus no vcpu can be kicked to enter the SIE again if for some reason an interrupt is pending for that vm. Additionally the IAM restore value is set to 0x00. That would be a bug introduced by incomplete device de-registration, i.e. missing kvm_s390_gisc_unregister() call. Setting this value and the IAM in the GISA to 0x00 guarantees that late interrupts don't bring the GISA back into the alert list. CPU stall caused by kvm_s390_gisa_destroy(): [ 4915.311372] rcu: INFO: rcu_sched detected expedited stalls on CPUs/tasks: { 14-.... } 24533 jiffies s: 5269 root: 0x1/. [ 4915.311390] rcu: blocking rcu_node structures (internal RCU debug): l=1:0-15:0x4000/. [ 4915.311394] Task dump for CPU 14: [ 4915.311395] task:qemu-system-s39 state:R running task stack:0 pid:217198 ppid:1 flags:0x00000045 [ 4915.311399] Call Trace: [ 4915.311401] [<0000038003a33a10>] 0x38003a33a10 [ 4933.861321] rcu: INFO: rcu_sched self-detected stall on CPU [ 4933.861332] rcu: 14-....: (42008 ticks this GP) idle=53f4/1/0x4000000000000000 softirq=61530/61530 fqs=14031 [ 4933.861353] rcu: (t=42008 jiffies g=238109 q=100360 ncpus=18) [ 4933.861357] CPU: 14 PID: 217198 Comm: qemu-system-s39 Not tainted 6.5.0-20230816.rc6.git26.a9d17c5d8813.300.fc38.s390x #1 [ 4933.861360] Hardware name: IBM 8561 T01 703 (LPAR) [ 4933.861361] Krnl PSW : 0704e00180000000 000003ff804bfc66 (kvm_s390_gisa_destroy+0x3e/0xe0 [kvm]) [ 4933.861414] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 [ 4933.861416] Krnl GPRS: 0000000000000000 00000372000000fc 00000002134f8000 000000000d5f5900 [ 4933.861419] 00000002f5ea1d18 00000002f5ea1d18 0000000000000000 0000000000000000 [ 4933.861420] 00000002134fa890 00000002134f8958 000000000d5f5900 00000002134f8000 [ 4933.861422] 000003ffa06acf98 000003ffa06858b0 0000038003a33c20 0000038003a33bc8 [ 4933.861430] Krnl Code: 000003ff804bfc58: ec66002b007e cij %r6,0,6,000003ff804bfcae 000003ff804bfc5e: b904003a lgr %r3,%r10 #000003ff804bfc62: a7f40005 brc 15,000003ff804bfc6c >000003ff804bfc66: e330b7300204 lg %r3,10032(%r11) 000003ff804bfc6c: 58003000 l %r0,0(%r3) 000003ff804bfc70: ec03fffb6076 crj %r0,%r3,6,000003ff804bfc66 000003ff804bfc76: e320b7600271 lay %r2,10080(%r11) 000003ff804bfc7c: c0e5fffea339 brasl %r14,000003ff804942ee [ 4933.861444] Call Trace: [ 4933.861445] [<000003ff804bfc66>] kvm_s390_gisa_destroy+0x3e/0xe0 [kvm] [ 4933.861460] ([<00000002623523de>] free_unref_page+0xee/0x148) [ 4933.861507] [<000003ff804aea98>] kvm_arch_destroy_vm+0x50/0x120 [kvm] [ 4933.861521] [<000003ff8049d374>] kvm_destroy_vm+0x174/0x288 [kvm] [ 4933.861532] [<000003ff8049d4fe>] kvm_vm_release+0x36/0x48 [kvm] [ 4933.861542] [<00000002623cd04a>] __fput+0xea/0x2a8 [ 4933.861547] [<00000002620d5bf8>] task_work_run+0x88/0xf0 [ 4933.861551] [<00000002620b0aa6>] do_exit+0x2c6/0x528 [ 4933.861556] [<00000002620b0f00>] do_group_exit+0x40/0xb8 [ 4933.861557] [<00000002620b0fa6>] __s390x_sys_exit_group+0x2e/0x30 [ 4933.861559] [<0000000262d481f4>] __do_syscall+0x1d4/0x200 [ 4933.861563] [<0000000262d59028>] system_call+0x70/0x98 [ 4933.861565] Last Breaking-Event-Address: [ 4933.861566] [<0000038003a33b60>] 0x38003a33b60 Fixes: 9f30f6216378 ("KVM: s390: add gib_alert_irq_handler()") Signed-off-by: Michael Mueller Reviewed-by: Matthew Rosato Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20230901105823.3973928-1-mimu@linux.ibm.com Message-ID: <20230901105823.3973928-1-mimu@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/interrupt.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c1b47d608a2b20..efaebba5ee19c7 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -303,11 +303,6 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) return 0; } -static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) -{ - return READ_ONCE(gisa->next_alert) != (u32)virt_to_phys(gisa); -} - static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); @@ -3216,11 +3211,12 @@ void kvm_s390_gisa_destroy(struct kvm *kvm) if (!gi->origin) return; - if (gi->alert.mask) - KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", - kvm, gi->alert.mask); - while (gisa_in_alert_list(gi->origin)) - cpu_relax(); + WARN(gi->alert.mask != 0x00, + "unexpected non zero alert.mask 0x%02x", + gi->alert.mask); + gi->alert.mask = 0x00; + if (gisa_set_iam(gi->origin, gi->alert.mask)) + process_gib_alert_list(); hrtimer_cancel(&gi->timer); gi->origin = NULL; VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa); From b29a2acd36dd7a33c63f260df738fb96baa3d4f8 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 4 May 2023 14:00:42 +0200 Subject: [PATCH 033/661] KVM: x86/pmu: Truncate counter value to allowed width on write Performance counters are defined to have width less than 64 bits. The vPMU code maintains the counters in u64 variables but assumes the value to fit within the defined width. However, for Intel non-full-width counters (MSR_IA32_PERFCTRx) the value receieved from the guest is truncated to 32 bits and then sign-extended to full 64 bits. If a negative value is set, it's sign-extended to 64 bits, but then in kvm_pmu_incr_counter() it's incremented, truncated, and compared to the previous value for overflow detection. That previous value is not truncated, so it always evaluates bigger than the truncated new one, and a PMI is injected. If the PMI handler writes a negative counter value itself, the vCPU never quits the PMI loop. Turns out that Linux PMI handler actually does write the counter with the value just read with RDPMC, so when no full-width support is exposed via MSR_IA32_PERF_CAPABILITIES, and the guest initializes the counter to a negative value, it locks up. This has been observed in the field, for example, when the guest configures atop to use perfevents and runs two instances of it simultaneously. To address the problem, maintain the invariant that the counter value always fits in the defined bit width, by truncating the received value in the respective set_msr methods. For better readability, factor the out into a helper function, pmc_write_counter(), shared by vmx and svm parts. Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Cc: stable@vger.kernel.org Signed-off-by: Roman Kagan Link: https://lore.kernel.org/all/20230504120042.785651-1-rkagan@amazon.de Tested-by: Like Xu [sean: tweak changelog, s/set/write in the helper] Signed-off-by: Sean Christopherson --- arch/x86/kvm/pmu.h | 6 ++++++ arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7d9ba301c09062..1d64113de4883e 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -74,6 +74,12 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc) return counter & pmc_bitmask(pmc); } +static inline void pmc_write_counter(struct kvm_pmc *pmc, u64 val) +{ + pmc->counter += val - pmc_read_counter(pmc); + pmc->counter &= pmc_bitmask(pmc); +} + static inline void pmc_release_perf_event(struct kvm_pmc *pmc) { if (pmc->perf_event) { diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cef5a3d0abd096..373ff6a6687b3a 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -160,7 +160,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); return 0; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index f2efa0bf7ae8d4..820d3e1f6b4f82 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -436,11 +436,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { - pmc->counter += data - pmc_read_counter(pmc); + pmc_write_counter(pmc, data); pmc_update_sample_period(pmc); break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { From a16eb25b09c02a54c1c1b449d4b6cfa2cf3f013a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 25 Sep 2023 17:34:47 +0000 Subject: [PATCH 034/661] KVM: x86: Mask LVTPC when handling a PMI Per the SDM, "When the local APIC handles a performance-monitoring counters interrupt, it automatically sets the mask flag in the LVT performance counter register." Add this behavior to KVM's local APIC emulation. Failure to mask the LVTPC entry results in spurious PMIs, e.g. when running Linux as a guest, PMI handlers that do a "late_ack" spew a large number of "dazed and confused" spurious NMI warnings. Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests") Cc: stable@vger.kernel.org Signed-off-by: Jim Mattson Tested-by: Mingwei Zhang Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230925173448.3518223-3-mizhang@google.com [sean: massage changelog, correct Fixes] Signed-off-by: Sean Christopherson --- arch/x86/kvm/lapic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index dcd60b39e794d9..3e977dbbf9933d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2759,13 +2759,17 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { u32 reg = kvm_lapic_get_reg(apic, lvt_type); int vector, mode, trig_mode; + int r; if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; - return __apic_accept_irq(apic, mode, vector, 1, trig_mode, - NULL); + + r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); + if (r && lvt_type == APIC_LVTPC) + kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); + return r; } return 0; } From 73554b29bd70546c1a9efc9c160641ef1b849358 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 25 Sep 2023 17:34:46 +0000 Subject: [PATCH 035/661] KVM: x86/pmu: Synthesize at most one PMI per VM-exit When the irq_work callback, kvm_pmi_trigger_fn(), is invoked during a VM-exit that also invokes __kvm_perf_overflow() as a result of instruction emulation, kvm_pmu_deliver_pmi() will be called twice before the next VM-entry. Calling kvm_pmu_deliver_pmi() twice is unlikely to be problematic now that KVM sets the LVTPC mask bit when delivering a PMI. But using IRQ work to trigger the PMI is still broken, albeit very theoretically. E.g. if the self-IPI to trigger IRQ work is be delayed long enough for the vCPU to be migrated to a different pCPU, then it's possible for kvm_pmi_trigger_fn() to race with the kvm_pmu_deliver_pmi() from KVM_REQ_PMI and still generate two PMIs. KVM could set the mask bit using an atomic operation, but that'd just be piling on unnecessary code to workaround what is effectively a hack. The *only* reason KVM uses IRQ work is to ensure the PMI is treated as a wake event, e.g. if the vCPU just executed HLT. Remove the irq_work callback for synthesizing a PMI, and all of the logic for invoking it. Instead, to prevent a vcpu from leaving C0 with a PMI pending, add a check for KVM_REQ_PMI to kvm_vcpu_has_events(). Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions") Signed-off-by: Jim Mattson Tested-by: Mingwei Zhang Tested-by: Dapeng Mi Signed-off-by: Mingwei Zhang Link: https://lore.kernel.org/r/20230925173448.3518223-2-mizhang@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/pmu.c | 27 +-------------------------- arch/x86/kvm/x86.c | 3 +++ 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 17715cb8731d5d..70d139406bc80d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -528,7 +528,6 @@ struct kvm_pmu { u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; - struct irq_work irq_work; /* * Overlay the bitmap with a 64-bit atomic so that all bits can be diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index edb89b51b38381..9ae07db6f0f648 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static void kvm_pmi_trigger_fn(struct irq_work *irq_work) -{ - struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work); - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - - kvm_pmu_deliver_pmi(vcpu); -} - static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } - if (!pmc->intr || skip_pmi) - return; - - /* - * Inject PMI. If vcpu was in a guest mode during NMI PMI - * can be ejected on a guest mode re-entry. Otherwise we can't - * be sure that vcpu wasn't executing hlt instruction at the - * time of vmexit and is not going to re-enter guest mode until - * woken up. So we should wake it, but this is impossible from - * NMI context. Do it from irq work instead. - */ - if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu)) - irq_work_queue(&pmc_to_pmu(pmc)->irq_work); - else + if (pmc->intr && !skip_pmi) kvm_make_request(KVM_REQ_PMI, pmc->vcpu); } @@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) void kvm_pmu_reset(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - - irq_work_sync(&pmu->irq_work); static_call(kvm_x86_pmu_reset)(vcpu); } @@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu) memset(pmu, 0, sizeof(*pmu)); static_call(kvm_x86_pmu_init)(vcpu); - init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn); pmu->event_count = 0; pmu->need_cleanup = false; kvm_pmu_refresh(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda66b..42a4e8f5e89a4e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12843,6 +12843,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) return true; #endif + if (kvm_test_request(KVM_REQ_PMI, vcpu)) + return true; + if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || kvm_guest_apic_has_interrupt(vcpu))) From 1bbac8d6af085408885675c1e29b2581250be124 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 25 Aug 2023 15:55:02 +0200 Subject: [PATCH 036/661] dt-bindings: mmc: sdhci-msm: correct minimum number of clocks In the TXT binding before conversion, the "xo" clock was listed as optional. Conversion kept it optional in "clock-names", but not in "clocks". This fixes dbts_check warnings like: qcom-sdx65-mtp.dtb: mmc@8804000: clocks: [[13, 59], [13, 58]] is too short Cc: Fixes: a45537723f4b ("dt-bindings: mmc: sdhci-msm: Convert bindings to yaml") Signed-off-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20230825135503.282135-1-krzysztof.kozlowski@linaro.org Signed-off-by: Ulf Hansson --- Documentation/devicetree/bindings/mmc/sdhci-msm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml index 80141eb7fc6bed..10f34aa8ba8ae5 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml +++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml @@ -69,7 +69,7 @@ properties: maxItems: 4 clocks: - minItems: 3 + minItems: 2 items: - description: Main peripheral bus clock, PCLK/HCLK - AHB Bus clock - description: SDC MMC clock, MCLK From 32a9cdb8869dc111a0c96cf8e1762be9684af15b Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 30 Aug 2023 17:39:22 +0800 Subject: [PATCH 037/661] mmc: core: sdio: hold retuning if sdio in 1-bit mode tuning only support in 4-bit mode or 8 bit mode, so in 1-bit mode, need to hold retuning. Find this issue when use manual tuning method on imx93. When system resume back, SDIO WIFI try to switch back to 4 bit mode, first will trigger retuning, and all tuning command failed. Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230830093922.3095850-1-haibo.chen@nxp.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/sdio.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index f64b9ac76a5cdb..5914516df2f7fd 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -1089,8 +1089,14 @@ static int mmc_sdio_resume(struct mmc_host *host) } err = mmc_sdio_reinit_card(host); } else if (mmc_card_wake_sdio_irq(host)) { - /* We may have switched to 1-bit mode during suspend */ + /* + * We may have switched to 1-bit mode during suspend, + * need to hold retuning, because tuning only supprt + * 4-bit mode or 8 bit mode. + */ + mmc_retune_hold_now(host); err = sdio_enable_4bit_bus(host->card); + mmc_retune_release(host); } if (err) From 1202d617e3d04c8d27a14ef30784a698c48170b3 Mon Sep 17 00:00:00 2001 From: Sven van Ashbrook Date: Thu, 31 Aug 2023 16:00:56 +0000 Subject: [PATCH 038/661] mmc: sdhci-pci-gli: fix LPM negotiation so x86/S0ix SoCs can suspend To improve the r/w performance of GL9763E, the current driver inhibits LPM negotiation while the device is active. This prevents a large number of SoCs from suspending, notably x86 systems which commonly use S0ix as the suspend mechanism - for example, Intel Alder Lake and Raptor Lake processors. Failure description: 1. Userspace initiates s2idle suspend (e.g. via writing to /sys/power/state) 2. This switches the runtime_pm device state to active, which disables LPM negotiation, then calls the "regular" suspend callback 3. With LPM negotiation disabled, the bus cannot enter low-power state 4. On a large number of SoCs, if the bus not in a low-power state, S0ix cannot be entered, which in turn prevents the SoC from entering suspend. Fix by re-enabling LPM negotiation in the device's suspend callback. Suggested-by: Stanislaw Kardach Fixes: f9e5b33934ce ("mmc: host: Improve I/O read/write performance for GL9763E") Cc: stable@vger.kernel.org Signed-off-by: Sven van Ashbrook Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20230831160055.v3.1.I7ed1ca09797be2dd76ca914c57d88b32d24dac88@changeid Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 104 ++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 38 deletions(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index ae8c307b7aa7b7..109d4b010f9786 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -1144,42 +1144,6 @@ static u32 sdhci_gl9750_readl(struct sdhci_host *host, int reg) return value; } -#ifdef CONFIG_PM_SLEEP -static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - - pci_free_irq_vectors(slot->chip->pdev); - gli_pcie_enable_msi(slot); - - return sdhci_pci_resume_host(chip); -} - -static int sdhci_cqhci_gli_resume(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - int ret; - - ret = sdhci_pci_gli_resume(chip); - if (ret) - return ret; - - return cqhci_resume(slot->host->mmc); -} - -static int sdhci_cqhci_gli_suspend(struct sdhci_pci_chip *chip) -{ - struct sdhci_pci_slot *slot = chip->slots[0]; - int ret; - - ret = cqhci_suspend(slot->host->mmc); - if (ret) - return ret; - - return sdhci_suspend_host(slot->host); -} -#endif - static void gl9763e_hs400_enhanced_strobe(struct mmc_host *mmc, struct mmc_ios *ios) { @@ -1420,6 +1384,70 @@ static int gl9763e_runtime_resume(struct sdhci_pci_chip *chip) } #endif +#ifdef CONFIG_PM_SLEEP +static int sdhci_pci_gli_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + + pci_free_irq_vectors(slot->chip->pdev); + gli_pcie_enable_msi(slot); + + return sdhci_pci_resume_host(chip); +} + +static int gl9763e_resume(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + int ret; + + ret = sdhci_pci_gli_resume(chip); + if (ret) + return ret; + + ret = cqhci_resume(slot->host->mmc); + if (ret) + return ret; + + /* + * Disable LPM negotiation to bring device back in sync + * with its runtime_pm state. + */ + gl9763e_set_low_power_negotiation(slot, false); + + return 0; +} + +static int gl9763e_suspend(struct sdhci_pci_chip *chip) +{ + struct sdhci_pci_slot *slot = chip->slots[0]; + int ret; + + /* + * Certain SoCs can suspend only with the bus in low- + * power state, notably x86 SoCs when using S0ix. + * Re-enable LPM negotiation to allow entering L1 state + * and entering system suspend. + */ + gl9763e_set_low_power_negotiation(slot, true); + + ret = cqhci_suspend(slot->host->mmc); + if (ret) + goto err_suspend; + + ret = sdhci_suspend_host(slot->host); + if (ret) + goto err_suspend_host; + + return 0; + +err_suspend_host: + cqhci_resume(slot->host->mmc); +err_suspend: + gl9763e_set_low_power_negotiation(slot, false); + return ret; +} +#endif + static int gli_probe_slot_gl9763e(struct sdhci_pci_slot *slot) { struct pci_dev *pdev = slot->chip->pdev; @@ -1527,8 +1555,8 @@ const struct sdhci_pci_fixes sdhci_gl9763e = { .probe_slot = gli_probe_slot_gl9763e, .ops = &sdhci_gl9763e_ops, #ifdef CONFIG_PM_SLEEP - .resume = sdhci_cqhci_gli_resume, - .suspend = sdhci_cqhci_gli_suspend, + .resume = gl9763e_resume, + .suspend = gl9763e_suspend, #endif #ifdef CONFIG_PM .runtime_suspend = gl9763e_runtime_suspend, From 168054ca5cf783e07518681fad0904de8dcc6b17 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Sep 2023 12:54:50 +0300 Subject: [PATCH 039/661] mmc: sdhci-sprd: Fix error code in sdhci_sprd_tuning() Return an error code if sdhci_sprd_get_best_clk_sample() fails. Currently, it returns success. Fixes: d83d251bf3c2 ("mmc: sdhci-sprd: Add SD HS mode online tuning") Signed-off-by: Dan Carpenter Reviewed-by: Wenchao Chen Reviewed-by: Baolin Wang Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/a8af0a08-8405-43cc-bd83-85ff25f572ca@moroto.mountain Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index 649ae075e22999..6b84ba27e6ab0d 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -644,6 +644,7 @@ static int sdhci_sprd_tuning(struct mmc_host *mmc, struct mmc_card *card, best_clk_sample = sdhci_sprd_get_best_clk_sample(mmc, value); if (best_clk_sample < 0) { dev_err(mmc_dev(host->mmc), "all tuning phase fail!\n"); + err = best_clk_sample; goto out; } From f19c5a73e6f78d69efce66cfdce31148c76a61a6 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 13 Sep 2023 13:29:21 +0200 Subject: [PATCH 040/661] mmc: core: Fix error propagation for some ioctl commands Userspace has currently no way of checking the internal R1 response error bits for some commands. This is a problem for some commands, like RPMB for example. Typically, we may detect that the busy completion has successfully ended, while in fact the card did not complete the requested operation. To fix the problem, let's always poll with CMD13 for these commands and during the polling, let's also aggregate the R1 response bits. Before completing the ioctl request, let's propagate the R1 response bits too. Reviewed-by: Avri Altman Co-developed-by: Christian Loehle Signed-off-by: Christian Loehle Signed-off-by: Ulf Hansson Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230913112921.553019-1-ulf.hansson@linaro.org --- drivers/mmc/core/block.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index b5b414a71e0bb7..3a8f27c3e310a5 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -179,6 +179,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_queue *mq); static void mmc_blk_hsq_req_done(struct mmc_request *mrq); static int mmc_spi_err_check(struct mmc_card *card); +static int mmc_blk_busy_cb(void *cb_data, bool *busy); static struct mmc_blk_data *mmc_blk_get(struct gendisk *disk) { @@ -470,7 +471,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, struct mmc_data data = {}; struct mmc_request mrq = {}; struct scatterlist sg; - bool r1b_resp, use_r1b_resp = false; + bool r1b_resp; unsigned int busy_timeout_ms; int err; unsigned int target_part; @@ -551,8 +552,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, busy_timeout_ms = idata->ic.cmd_timeout_ms ? : MMC_BLK_TIMEOUT_MS; r1b_resp = (cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B; if (r1b_resp) - use_r1b_resp = mmc_prepare_busy_cmd(card->host, &cmd, - busy_timeout_ms); + mmc_prepare_busy_cmd(card->host, &cmd, busy_timeout_ms); mmc_wait_for_req(card->host, &mrq); memcpy(&idata->ic.response, cmd.resp, sizeof(cmd.resp)); @@ -605,19 +605,28 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, if (idata->ic.postsleep_min_us) usleep_range(idata->ic.postsleep_min_us, idata->ic.postsleep_max_us); - /* No need to poll when using HW busy detection. */ - if ((card->host->caps & MMC_CAP_WAIT_WHILE_BUSY) && use_r1b_resp) - return 0; - if (mmc_host_is_spi(card->host)) { if (idata->ic.write_flag || r1b_resp || cmd.flags & MMC_RSP_SPI_BUSY) return mmc_spi_err_check(card); return err; } - /* Ensure RPMB/R1B command has completed by polling with CMD13. */ - if (idata->rpmb || r1b_resp) - err = mmc_poll_for_busy(card, busy_timeout_ms, false, - MMC_BUSY_IO); + + /* + * Ensure RPMB, writes and R1B responses are completed by polling with + * CMD13. Note that, usually we don't need to poll when using HW busy + * detection, but here it's needed since some commands may indicate the + * error through the R1 status bits. + */ + if (idata->rpmb || idata->ic.write_flag || r1b_resp) { + struct mmc_blk_busy_data cb_data = { + .card = card, + }; + + err = __mmc_poll_for_busy(card->host, 0, busy_timeout_ms, + &mmc_blk_busy_cb, &cb_data); + + idata->ic.response[0] = cb_data.status; + } return err; } From c7bb120c1c66672b657e95d0942c989b8275aeb3 Mon Sep 17 00:00:00 2001 From: Pablo Sun Date: Fri, 22 Sep 2023 17:53:48 +0800 Subject: [PATCH 041/661] mmc: mtk-sd: Use readl_poll_timeout_atomic in msdc_reset_hw Use atomic readl_poll_timeout_atomic, because msdc_reset_hw may be invoked in IRQ handler in the following context: msdc_irq() -> msdc_cmd_done() -> msdc_reset_hw() The following kernel BUG stack trace can be observed on Genio 1200 EVK after initializing MSDC1 hardware during kernel boot: [ 1.187441] BUG: scheduling while atomic: swapper/0/0/0x00010002 [ 1.189157] Modules linked in: [ 1.204633] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 5.15.42-mtk+modified #1 [ 1.205713] Hardware name: MediaTek Genio 1200 EVK-P1V2-EMMC (DT) [ 1.206484] Call trace: [ 1.206796] dump_backtrace+0x0/0x1ac [ 1.207266] show_stack+0x24/0x30 [ 1.207692] dump_stack_lvl+0x68/0x84 [ 1.208162] dump_stack+0x1c/0x38 [ 1.208587] __schedule_bug+0x68/0x80 [ 1.209056] __schedule+0x6ec/0x7c0 [ 1.209502] schedule+0x7c/0x110 [ 1.209915] schedule_hrtimeout_range_clock+0xc4/0x1f0 [ 1.210569] schedule_hrtimeout_range+0x20/0x30 [ 1.211148] usleep_range_state+0x84/0xc0 [ 1.211661] msdc_reset_hw+0xc8/0x1b0 [ 1.212134] msdc_cmd_done.isra.0+0x4ac/0x5f0 [ 1.212693] msdc_irq+0x104/0x2d4 [ 1.213121] __handle_irq_event_percpu+0x68/0x280 [ 1.213725] handle_irq_event+0x70/0x15c [ 1.214230] handle_fasteoi_irq+0xb0/0x1a4 [ 1.214755] handle_domain_irq+0x6c/0x9c [ 1.215260] gic_handle_irq+0xc4/0x180 [ 1.215741] call_on_irq_stack+0x2c/0x54 [ 1.216245] do_interrupt_handler+0x5c/0x70 [ 1.216782] el1_interrupt+0x30/0x80 [ 1.217242] el1h_64_irq_handler+0x1c/0x2c [ 1.217769] el1h_64_irq+0x78/0x7c [ 1.218206] cpuidle_enter_state+0xc8/0x600 [ 1.218744] cpuidle_enter+0x44/0x5c [ 1.219205] do_idle+0x224/0x2d0 [ 1.219624] cpu_startup_entry+0x30/0x80 [ 1.220129] rest_init+0x108/0x134 [ 1.220568] arch_call_rest_init+0x1c/0x28 [ 1.221094] start_kernel+0x6c0/0x700 [ 1.221564] __primary_switched+0xc0/0xc8 Fixes: ffaea6ebfe9c ("mmc: mtk-sd: Use readl_poll_timeout instead of open-coded polling") Signed-off-by: Pablo Sun Reviewed-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230922095348.22182-1-pablo.sun@mediatek.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 5392200cfdf7a1..97f7c3d4be6ea9 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -669,11 +669,11 @@ static void msdc_reset_hw(struct msdc_host *host) u32 val; sdr_set_bits(host->base + MSDC_CFG, MSDC_CFG_RST); - readl_poll_timeout(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0); + readl_poll_timeout_atomic(host->base + MSDC_CFG, val, !(val & MSDC_CFG_RST), 0, 0); sdr_set_bits(host->base + MSDC_FIFOCS, MSDC_FIFOCS_CLR); - readl_poll_timeout(host->base + MSDC_FIFOCS, val, - !(val & MSDC_FIFOCS_CLR), 0, 0); + readl_poll_timeout_atomic(host->base + MSDC_FIFOCS, val, + !(val & MSDC_FIFOCS_CLR), 0, 0); val = readl(host->base + MSDC_INT); writel(val, host->base + MSDC_INT); From 48a3adcf47888019f953c57a7290036744635912 Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Wed, 20 Sep 2023 12:23:49 +0000 Subject: [PATCH 042/661] perf pmu: Fix perf stat output with correct scale and unit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The perf_pmu__parse_* functions for the sysfs files of pmu event’s scale, unit, per-pkg and snapshot were updated in commit 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs"). However, the paths for these sysfs files were incorrect. This resulted in perf stat reporting values with wrong scaling and missing units. This is fixed by correcting the paths for these sysfs files. Before this fix: $sudo perf stat -e power/energy-pkg/ -- sleep 2 Performance counter stats for 'system wide': 351,217,188,864 power/energy-pkg/ 2.004127961 seconds time elapsed After this fix: $sudo perf stat -e power/energy-pkg/ -- sleep 2 Performance counter stats for 'system wide': 80.58 Joules power/energy-pkg/ 2.004009749 seconds time elapsed Fixes: 7b723dbb96e8 ("perf pmu: Be lazy about loading event info files from sysfs") Signed-off-by: Wyes Karny Reviewed-by: Ian Rogers Cc: ravi.bangoria@amd.com Cc: sandipan.das@amd.com Cc: james.clark@arm.com Cc: kan.liang@linux.intel.com Link: https://lore.kernel.org/r/20230920122349.418673-1-wyes.karny@amd.com Signed-off-by: Namhyung Kim --- tools/perf/util/pmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8de6f39abd1b6d..d515ba8a0e160c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -295,7 +295,7 @@ static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *al len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.scale", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.scale", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -330,7 +330,7 @@ static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *ali len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.unit", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.unit", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -364,7 +364,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.per-pkg", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) @@ -385,7 +385,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) return 0; - scnprintf(path + len, sizeof(path) - len, "%s/%s.snapshot", pmu->name, alias->name); + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name); fd = open(path, O_RDONLY); if (fd == -1) From 84ee19bffc9306128cd0f1c650e89767079efeff Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Wed, 27 Sep 2023 10:15:00 +0300 Subject: [PATCH 043/661] mmc: core: Capture correct oemid-bits for eMMC cards The OEMID is an 8-bit binary number rather than 16-bit as the current code parses for. The OEMID occupies bits [111:104] in the CID register, see the eMMC spec JESD84-B51 paragraph 7.2.3. It seems that the 16-bit comes from the legacy MMC specs (v3.31 and before). Let's fix the parsing by simply move to use 8-bit instead of 16-bit. This means we ignore the impact on some of those old MMC cards that may be out there, but on the other hand this shouldn't be a problem as the OEMID seems not be an important feature for these cards. Signed-off-by: Avri Altman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230927071500.1791882-1-avri.altman@wdc.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 89cd48fcec79f2..4a4bab9aa7263e 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -104,7 +104,7 @@ static int mmc_decode_cid(struct mmc_card *card) case 3: /* MMC v3.1 - v3.3 */ case 4: /* MMC v4 */ card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); - card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 8); card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); From 87d1888aa40f25773fa0b948bcb2545f97e2cb15 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 15:52:19 +0400 Subject: [PATCH 044/661] fs/ntfs3: Add ckeck in ni_update_parent() Check simple case when parent inode equals current inode. Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 2b85cb10f0be40..d49fbb22bd5e8f 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -3208,6 +3208,12 @@ static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, if (!fname || !memcmp(&fname->dup, dup, sizeof(fname->dup))) continue; + /* Check simple case when parent inode equals current inode. */ + if (ino_get(&fname->home) == ni->vfs_inode.i_ino) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + continue; + } + /* ntfs_iget5 may sleep. */ dir = ntfs_iget5(sb, &fname->home, NULL); if (IS_ERR(dir)) { From 06ccfb00645990a9fcc14249e6d1c25921ecb836 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 15:57:19 +0400 Subject: [PATCH 045/661] fs/ntfs3: Write immediately updated ntfs state Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 33afee0f55593a..edb51dc12f65f6 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -983,18 +983,11 @@ int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty) if (err) return err; - mark_inode_dirty(&ni->vfs_inode); + mark_inode_dirty_sync(&ni->vfs_inode); /* verify(!ntfs_update_mftmirr()); */ - /* - * If we used wait=1, sync_inode_metadata waits for the io for the - * inode to finish. It hangs when media is removed. - * So wait=0 is sent down to sync_inode_metadata - * and filemap_fdatawrite is used for the data blocks. - */ - err = sync_inode_metadata(&ni->vfs_inode, 0); - if (!err) - err = filemap_fdatawrite(ni->vfs_inode.i_mapping); + /* write mft record on disk. */ + err = _ni_write_inode(&ni->vfs_inode, 1); return err; } From fc471e39e38fea6677017cbdd6d928088a59fc67 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:12:58 +0400 Subject: [PATCH 046/661] fs/ntfs3: Use kvmalloc instead of kmalloc(... __GFP_NOWARN) Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrlist.c | 15 +++++++++++++-- fs/ntfs3/bitmap.c | 3 ++- fs/ntfs3/super.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 42631b31adf17f..7c01735d1219d8 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,7 +52,8 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); - le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); + /* attr is resident: lsize < record_size (1K or 4K) */ + le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { err = -ENOMEM; goto out; @@ -80,7 +81,17 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (err < 0) goto out; - le = kmalloc(al_aligned(lsize), GFP_NOFS | __GFP_NOWARN); + /* attr is nonresident. + * The worst case: + * 1T (2^40) extremely fragmented file. + * cluster = 4K (2^12) => 2^28 fragments + * 2^9 fragments per one record => 2^19 records + * 2^5 bytes of ATTR_LIST_ENTRY per one record => 2^24 bytes. + * + * the result is 16M bytes per attribute list. + * Use kvmalloc to allocate in range [several Kbytes - dozen Mbytes] + */ + le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { err = -ENOMEM; goto out; diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 107e808e06eae0..d66055e30aff94 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -659,7 +659,8 @@ int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) wnd->bits_last = wbits; wnd->free_bits = - kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN); + kvmalloc_array(wnd->nwnd, sizeof(u16), GFP_KERNEL | __GFP_ZERO); + if (!wnd->free_bits) return -ENOMEM; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cfec5e0c7f66ae..ea4e218c3f7553 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1367,7 +1367,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } bytes = inode->i_size; - sbi->def_table = t = kmalloc(bytes, GFP_NOFS | __GFP_NOWARN); + sbi->def_table = t = kvmalloc(bytes, GFP_KERNEL); if (!t) { err = -ENOMEM; goto put_inode_out; From 013ff63b649475f0ee134e2c8d0c8e65284ede50 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:17:02 +0400 Subject: [PATCH 047/661] fs/ntfs3: Add more attributes checks in mi_enum_attr() Signed-off-by: Konstantin Komarov --- fs/ntfs3/record.c | 68 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index c12ebffc94da4c..02cc91ed88357c 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -193,8 +193,9 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; u32 used = le32_to_cpu(rec->used); - u32 t32, off, asize; + u32 t32, off, asize, prev_type; u16 t16; + u64 data_size, alloc_size, tot_size; if (!attr) { u32 total = le32_to_cpu(rec->total); @@ -213,6 +214,7 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) if (!is_rec_inuse(rec)) return NULL; + prev_type = 0; attr = Add2Ptr(rec, off); } else { /* Check if input attr inside record. */ @@ -226,11 +228,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return NULL; } - if (off + asize < off) { - /* Overflow check. */ + /* Overflow check. */ + if (off + asize < off) return NULL; - } + prev_type = le32_to_cpu(attr->type); attr = Add2Ptr(attr, asize); off += asize; } @@ -250,7 +252,11 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* 0x100 is last known attribute for now. */ t32 = le32_to_cpu(attr->type); - if ((t32 & 0xf) || (t32 > 0x100)) + if (!t32 || (t32 & 0xf) || (t32 > 0x100)) + return NULL; + + /* attributes in record must be ordered by type */ + if (t32 < prev_type) return NULL; /* Check overflow and boundary. */ @@ -259,16 +265,15 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) /* Check size of attribute. */ if (!attr->non_res) { + /* Check resident fields. */ if (asize < SIZEOF_RESIDENT) return NULL; t16 = le16_to_cpu(attr->res.data_off); - if (t16 > asize) return NULL; - t32 = le32_to_cpu(attr->res.data_size); - if (t16 + t32 > asize) + if (t16 + le32_to_cpu(attr->res.data_size) > asize) return NULL; t32 = sizeof(short) * attr->name_len; @@ -278,21 +283,52 @@ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) return attr; } - /* Check some nonresident fields. */ - if (attr->name_len && - le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > - le16_to_cpu(attr->nres.run_off)) { + /* Check nonresident fields. */ + if (attr->non_res != 1) + return NULL; + + t16 = le16_to_cpu(attr->nres.run_off); + if (t16 > asize) + return NULL; + + t32 = sizeof(short) * attr->name_len; + if (t32 && le16_to_cpu(attr->name_off) + t32 > t16) + return NULL; + + /* Check start/end vcn. */ + if (le64_to_cpu(attr->nres.svcn) > le64_to_cpu(attr->nres.evcn) + 1) + return NULL; + + data_size = le64_to_cpu(attr->nres.data_size); + if (le64_to_cpu(attr->nres.valid_size) > data_size) return NULL; - } - if (attr->nres.svcn || !is_attr_ext(attr)) { + alloc_size = le64_to_cpu(attr->nres.alloc_size); + if (data_size > alloc_size) + return NULL; + + t32 = mi->sbi->cluster_mask; + if (alloc_size & t32) + return NULL; + + if (!attr->nres.svcn && is_attr_ext(attr)) { + /* First segment of sparse/compressed attribute */ + if (asize + 8 < SIZEOF_NONRESIDENT_EX) + return NULL; + + tot_size = le64_to_cpu(attr->nres.total_size); + if (tot_size & t32) + return NULL; + + if (tot_size > alloc_size) + return NULL; + } else { if (asize + 8 < SIZEOF_NONRESIDENT) return NULL; if (attr->nres.c_unit) return NULL; - } else if (asize + 8 < SIZEOF_NONRESIDENT_EX) - return NULL; + } return attr; } From bfbe5b31caa74ab97f1784fe9ade5f45e0d3de91 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:22:53 +0400 Subject: [PATCH 048/661] fs/ntfs3: fix deadlock in mark_as_free_ex Reported-by: syzbot+e94d98936a0ed08bde43@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/fsntfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index edb51dc12f65f6..fbfe21dbb42597 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -2454,10 +2454,12 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) { CLST end, i, zone_len, zlen; struct wnd_bitmap *wnd = &sbi->used.bitmap; + bool dirty = false; down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); if (!wnd_is_used(wnd, lcn, len)) { - ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + /* mark volume as dirty out of wnd->rw_lock */ + dirty = true; end = lcn + len; len = 0; @@ -2511,6 +2513,8 @@ void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) out: up_write(&wnd->rw_lock); + if (dirty) + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } /* From 91a4b1ee78cb100b19b70f077c247f211110348f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 30 Jun 2023 16:25:25 +0400 Subject: [PATCH 049/661] fs/ntfs3: Fix shift-out-of-bounds in ntfs_fill_super Reported-by: syzbot+478c1bf0e6bf4a8f3a04@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov --- fs/ntfs3/ntfs_fs.h | 2 ++ fs/ntfs3/super.c | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 629403ede6e5f1..788567d71d939a 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -42,9 +42,11 @@ enum utf16_endian; #define MINUS_ONE_T ((size_t)(-1)) /* Biggest MFT / smallest cluster */ #define MAXIMUM_BYTES_PER_MFT 4096 +#define MAXIMUM_SHIFT_BYTES_PER_MFT 12 #define NTFS_BLOCKS_PER_MFT_RECORD (MAXIMUM_BYTES_PER_MFT / 512) #define MAXIMUM_BYTES_PER_INDEX 4096 +#define MAXIMUM_SHIFT_BYTES_PER_INDEX 12 #define NTFS_BLOCKS_PER_INODE (MAXIMUM_BYTES_PER_INDEX / 512) /* NTFS specific error code when fixup failed. */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index ea4e218c3f7553..f78c67452b2a8c 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -899,9 +899,17 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, goto out; } - sbi->record_size = record_size = - boot->record_size < 0 ? 1 << (-boot->record_size) : - (u32)boot->record_size << cluster_bits; + if (boot->record_size >= 0) { + record_size = (u32)boot->record_size << cluster_bits; + } else if (-boot->record_size <= MAXIMUM_SHIFT_BYTES_PER_MFT) { + record_size = 1u << (-boot->record_size); + } else { + ntfs_err(sb, "%s: invalid record size %d.", hint, + boot->record_size); + goto out; + } + + sbi->record_size = record_size; sbi->record_bits = blksize_bits(record_size); sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes @@ -918,9 +926,15 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, goto out; } - sbi->index_size = boot->index_size < 0 ? - 1u << (-boot->index_size) : - (u32)boot->index_size << cluster_bits; + if (boot->index_size >= 0) { + sbi->index_size = (u32)boot->index_size << cluster_bits; + } else if (-boot->index_size <= MAXIMUM_SHIFT_BYTES_PER_INDEX) { + sbi->index_size = 1u << (-boot->index_size); + } else { + ntfs_err(sb, "%s: invalid index size %d.", hint, + boot->index_size); + goto out; + } /* Check index record size. */ if (sbi->index_size < SECTOR_SIZE || !is_power_of_2(sbi->index_size)) { From 3f2f09f18972dc4548feebd9946c10c04351333d Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:01:04 +0300 Subject: [PATCH 050/661] fs/ntfs3: Use inode_set_ctime_to_ts instead of inode_set_ctime Signed-off-by: Konstantin Komarov --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index eb2ed0701495f1..2f76dc055c1f35 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -170,8 +170,8 @@ static struct inode *ntfs_read_mft(struct inode *inode, nt2kernel(std5->cr_time, &ni->i_crtime); #endif nt2kernel(std5->a_time, &inode->i_atime); - ctime = inode_get_ctime(inode); nt2kernel(std5->c_time, &ctime); + inode_set_ctime_to_ts(inode, ctime); nt2kernel(std5->m_time, &inode->i_mtime); ni->std_fa = std5->fa; From 4ad5c924df6cd6d85708fa23f9d9a2b78a2e428e Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:07:59 +0300 Subject: [PATCH 051/661] fs/ntfs3: Allow repeated call to ntfs3_put_sbi Signed-off-by: Konstantin Komarov --- fs/ntfs3/bitmap.c | 1 + fs/ntfs3/super.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index d66055e30aff94..63f14a0232f6a0 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -125,6 +125,7 @@ void wnd_close(struct wnd_bitmap *wnd) struct rb_node *node, *next; kfree(wnd->free_bits); + wnd->free_bits = NULL; run_close(&wnd->run); node = rb_first(&wnd->start_tree); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f78c67452b2a8c..71c80c578feb6e 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -576,20 +576,30 @@ static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi) wnd_close(&sbi->mft.bitmap); wnd_close(&sbi->used.bitmap); - if (sbi->mft.ni) + if (sbi->mft.ni) { iput(&sbi->mft.ni->vfs_inode); + sbi->mft.ni = NULL; + } - if (sbi->security.ni) + if (sbi->security.ni) { iput(&sbi->security.ni->vfs_inode); + sbi->security.ni = NULL; + } - if (sbi->reparse.ni) + if (sbi->reparse.ni) { iput(&sbi->reparse.ni->vfs_inode); + sbi->reparse.ni = NULL; + } - if (sbi->objid.ni) + if (sbi->objid.ni) { iput(&sbi->objid.ni->vfs_inode); + sbi->objid.ni = NULL; + } - if (sbi->volume.ni) + if (sbi->volume.ni) { iput(&sbi->volume.ni->vfs_inode); + sbi->volume.ni = NULL; + } ntfs_update_mftmirr(sbi, 0); @@ -1577,6 +1587,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) iput(inode); out: kfree(boot2); + ntfs3_put_sbi(sbi); return err; } From dcc852e509a4cba0ac6ac734077cef260e4e0fe6 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Fri, 22 Sep 2023 13:12:11 +0300 Subject: [PATCH 052/661] fs/ntfs3: Fix alternative boot searching Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 71c80c578feb6e..d2951b23f52a50 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -846,7 +846,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, struct ntfs_sb_info *sbi = sb->s_fs_info; int err; u32 mb, gb, boot_sector_size, sct_per_clst, record_size; - u64 sectors, clusters, mlcn, mlcn2; + u64 sectors, clusters, mlcn, mlcn2, dev_size0; struct NTFS_BOOT *boot; struct buffer_head *bh; struct MFT_REC *rec; @@ -855,6 +855,9 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, u32 boot_off = 0; const char *hint = "Primary boot"; + /* Save original dev_size. Used with alternative boot. */ + dev_size0 = dev_size; + sbi->volume.blocks = dev_size >> PAGE_SHIFT; bh = ntfs_bread(sb, 0); @@ -1087,9 +1090,9 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, } out: - if (err == -EINVAL && !bh->b_blocknr && dev_size > PAGE_SHIFT) { + if (err == -EINVAL && !bh->b_blocknr && dev_size0 > PAGE_SHIFT) { u32 block_size = min_t(u32, sector_size, PAGE_SIZE); - u64 lbo = dev_size - sizeof(*boot); + u64 lbo = dev_size0 - sizeof(*boot); /* * Try alternative boot (last sector) @@ -1103,6 +1106,7 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, boot_off = lbo & (block_size - 1); hint = "Alternative boot"; + dev_size = dev_size0; /* restore original size. */ goto check_boot; } brelse(bh); From f684073c09fd4658faa0a75c12b65d89a58b8e83 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:47:07 +0300 Subject: [PATCH 053/661] fs/ntfs3: Refactoring and comments Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 6 +++--- fs/ntfs3/file.c | 4 ++-- fs/ntfs3/inode.c | 3 ++- fs/ntfs3/namei.c | 6 +++--- fs/ntfs3/ntfs.h | 2 +- fs/ntfs3/ntfs_fs.h | 2 -- fs/ntfs3/record.c | 6 ++++++ fs/ntfs3/super.c | 19 ++++++++----------- 8 files changed, 25 insertions(+), 23 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index a9d82bbb4729ec..e16487764282ee 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1106,10 +1106,10 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, } } - /* + /* * The code below may require additional cluster (to extend attribute list) - * and / or one MFT record - * It is too complex to undo operations if -ENOSPC occurs deep inside + * and / or one MFT record + * It is too complex to undo operations if -ENOSPC occurs deep inside * in 'ni_insert_nonresident'. * Return in advance -ENOSPC here if there are no free cluster and no free MFT. */ diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 962f12ce6c0a62..1f7a194983c5db 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -745,8 +745,8 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) } static ssize_t ntfs_file_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, unsigned int flags) + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) { struct inode *inode = in->f_mapping->host; struct ntfs_inode *ni = ntfs_i(inode); diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 2f76dc055c1f35..d6d021e19aaa23 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -1660,7 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, d_instantiate(dentry, inode); /* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */ - inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime); + inode->i_atime = inode->i_mtime = + inode_set_ctime_to_ts(inode, ni->i_crtime); dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime); mark_inode_dirty(dir); diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index ad430d50bd7983..eedacf94edd805 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de) err = ntfs_link_inode(inode, de); if (!err) { - dir->i_mtime = inode_set_ctime_to_ts(inode, - inode_set_ctime_current(dir)); + dir->i_mtime = inode_set_ctime_to_ts( + inode, inode_set_ctime_current(dir)); mark_inode_dirty(inode); mark_inode_dirty(dir); d_instantiate(de, inode); @@ -373,7 +373,7 @@ static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, #ifdef CONFIG_NTFS3_FS_POSIX_ACL if (IS_POSIXACL(dir)) { - /* + /* * Load in cache current acl to avoid ni_lock(dir): * ntfs_create_inode -> ntfs_init_acl -> posix_acl_create -> * ntfs_get_acl -> ntfs_get_acl_ex -> ni_lock diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 98b76d1b09e787..86aecbb01a92f2 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -847,7 +847,7 @@ struct OBJECT_ID { // Birth Volume Id is the Object Id of the Volume on. // which the Object Id was allocated. It never changes. struct GUID BirthVolumeId; //0x10: - + // Birth Object Id is the first Object Id that was // ever assigned to this MFT Record. I.e. If the Object Id // is changed for some reason, this field will reflect the diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 788567d71d939a..0e6a2777870c3e 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -497,8 +497,6 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); int ntfs3_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); -void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, - CLST len); int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 02cc91ed88357c..53629b1f65e995 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -189,6 +189,12 @@ int mi_read(struct mft_inode *mi, bool is_mft) return err; } +/* + * mi_enum_attr - start/continue attributes enumeration in record. + * + * NOTE: mi->mrec - memory of size sbi->record_size + * here we sure that mi->mrec->total == sbi->record_size (see mi_read) + */ struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) { const struct MFT_REC *rec = mi->mrec; diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index d2951b23f52a50..f9a2143671135b 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -488,7 +488,6 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, { int err; struct super_block *sb = pde_data(file_inode(file)); - struct ntfs_sb_info *sbi = sb->s_fs_info; ssize_t ret = count; u8 *label = kmalloc(count, GFP_NOFS); @@ -502,7 +501,7 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, while (ret > 0 && label[ret - 1] == '\n') ret -= 1; - err = ntfs_set_label(sbi, label, ret); + err = ntfs_set_label(sb->s_fs_info, label, ret); if (err < 0) { ntfs_err(sb, "failed (%d) to write label", err); @@ -1082,10 +1081,10 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, if (bh->b_blocknr && !sb_rdonly(sb)) { /* - * Alternative boot is ok but primary is not ok. - * Do not update primary boot here 'cause it may be faked boot. - * Let ntfs to be mounted and update boot later. - */ + * Alternative boot is ok but primary is not ok. + * Do not update primary boot here 'cause it may be faked boot. + * Let ntfs to be mounted and update boot later. + */ *boot2 = kmemdup(boot, sizeof(*boot), GFP_NOFS | __GFP_NOWARN); } @@ -1549,9 +1548,9 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) if (boot2) { /* - * Alternative boot is ok but primary is not ok. - * Volume is recognized as NTFS. Update primary boot. - */ + * Alternative boot is ok but primary is not ok. + * Volume is recognized as NTFS. Update primary boot. + */ struct buffer_head *bh0 = sb_getblk(sb, 0); if (bh0) { if (buffer_locked(bh0)) @@ -1785,7 +1784,6 @@ static int __init init_ntfs_fs(void) if (IS_ENABLED(CONFIG_NTFS3_LZX_XPRESS)) pr_info("ntfs3: Read-only LZX/Xpress compression included\n"); - #ifdef CONFIG_PROC_FS /* Create "/proc/fs/ntfs3" */ proc_info_root = proc_mkdir("fs/ntfs3", NULL); @@ -1827,7 +1825,6 @@ static void __exit exit_ntfs_fs(void) if (proc_info_root) remove_proc_entry("fs/ntfs3", NULL); #endif - } MODULE_LICENSE("GPL"); From d27e202b9ac416e52093edf8789614d93dbd6231 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:54:07 +0300 Subject: [PATCH 054/661] fs/ntfs3: Add more info into /proc/fs/ntfs3//volinfo Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index f9a2143671135b..5811da7e9d458e 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -453,15 +453,23 @@ static struct proc_dir_entry *proc_info_root; * ntfs3.1 * cluster size * number of clusters + * total number of mft records + * number of used mft records ~= number of files + folders + * real state of ntfs "dirty"/"clean" + * current state of ntfs "dirty"/"clean" */ static int ntfs3_volinfo(struct seq_file *m, void *o) { struct super_block *sb = m->private; struct ntfs_sb_info *sbi = sb->s_fs_info; - seq_printf(m, "ntfs%d.%d\n%u\n%zu\n", sbi->volume.major_ver, - sbi->volume.minor_ver, sbi->cluster_size, - sbi->used.bitmap.nbits); + seq_printf(m, "ntfs%d.%d\n%u\n%zu\n\%zu\n%zu\n%s\n%s\n", + sbi->volume.major_ver, sbi->volume.minor_ver, + sbi->cluster_size, sbi->used.bitmap.nbits, + sbi->mft.bitmap.nbits, + sbi->mft.bitmap.nbits - wnd_zeroes(&sbi->mft.bitmap), + sbi->volume.real_dirty ? "dirty" : "clean", + (sbi->volume.flags & VOLUME_FLAG_DIRTY) ? "dirty" : "clean"); return 0; } From e52dce610a2d53bf2b5e94a8843c71cb73a91ea5 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 25 Sep 2023 10:56:15 +0300 Subject: [PATCH 055/661] fs/ntfs3: Do not allow to change label if volume is read-only Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 5811da7e9d458e..cf0a720523f058 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -497,7 +497,12 @@ static ssize_t ntfs3_label_write(struct file *file, const char __user *buffer, int err; struct super_block *sb = pde_data(file_inode(file)); ssize_t ret = count; - u8 *label = kmalloc(count, GFP_NOFS); + u8 *label; + + if (sb_rdonly(sb)) + return -EROFS; + + label = kmalloc(count, GFP_NOFS); if (!label) return -ENOMEM; From 32e9212256b88f35466642f9c939bb40cfb2c2de Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 26 Sep 2023 11:19:08 +0300 Subject: [PATCH 056/661] fs/ntfs3: Fix possible NULL-ptr-deref in ni_readpage_cmpr() Signed-off-by: Konstantin Komarov --- fs/ntfs3/frecord.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index d49fbb22bd5e8f..dad976a6898596 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -2148,7 +2148,7 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) for (i = 0; i < pages_per_frame; i++) { pg = pages[i]; - if (i == idx) + if (i == idx || !pg) continue; unlock_page(pg); put_page(pg); From 9c689c8dc86f8ca99bf91c05f24c8bab38fe7d5f Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Tue, 26 Sep 2023 11:28:11 +0300 Subject: [PATCH 057/661] fs/ntfs3: Fix NULL pointer dereference on error in attr_allocate_frame() Signed-off-by: Konstantin Komarov --- fs/ntfs3/attrib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index e16487764282ee..63f70259edc0d4 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1736,10 +1736,8 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) { - err = -ENOENT; - goto out; - } + if (!attr_b) + return -ENOENT; attr = attr_b; le = le_b; From 8e7e27b2ee1e19c4040d4987e345f678a74c0aed Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 20 Apr 2023 15:46:22 +0800 Subject: [PATCH 058/661] fs/ntfs3: fix panic about slab-out-of-bounds caused by ntfs_list_ea() Here is a BUG report about linux-6.1 from syzbot, but it still remains within upstream: BUG: KASAN: slab-out-of-bounds in ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] BUG: KASAN: slab-out-of-bounds in ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 Read of size 1 at addr ffff888021acaf3d by task syz-executor128/3632 Call Trace: kasan_report+0x139/0x170 mm/kasan/report.c:495 ntfs_list_ea fs/ntfs3/xattr.c:191 [inline] ntfs_listxattr+0x401/0x570 fs/ntfs3/xattr.c:710 vfs_listxattr fs/xattr.c:457 [inline] listxattr+0x293/0x2d0 fs/xattr.c:804 path_listxattr fs/xattr.c:828 [inline] __do_sys_llistxattr fs/xattr.c:846 [inline] Before derefering field members of `ea` in unpacked_ea_size(), we need to check whether the EA_FULL struct is located in access validate range. Similarly, when derefering `ea->name` field member, we need to check whethe the ea->name is located in access validate range, too. Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") Reported-by: syzbot+9fcea5ef6dc4dc72d334@syzkaller.appspotmail.com Signed-off-by: Zeng Heng [almaz.alexandrovich@paragon-software.com: took the ret variable out of the loop block] Signed-off-by: Konstantin Komarov --- fs/ntfs3/xattr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 29fd391899e59d..4920548192a0cf 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -211,7 +211,8 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, size = le32_to_cpu(info->size); /* Enumerate all xattrs. */ - for (ret = 0, off = 0; off < size; off += ea_size) { + ret = 0; + for (off = 0; off + sizeof(struct EA_FULL) < size; off += ea_size) { ea = Add2Ptr(ea_all, off); ea_size = unpacked_ea_size(ea); @@ -219,6 +220,10 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, break; if (buffer) { + /* Check if we can use field ea->name */ + if (off + ea_size > size) + break; + if (ret + ea->name_len + 1 > bytes_per_buffer) { err = -ERANGE; goto out; From 34e6552a442f268eefd408e47f4f2d471aa64829 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 13 Jul 2023 22:41:46 +0300 Subject: [PATCH 059/661] fs/ntfs3: Fix OOB read in ntfs_init_from_boot Syzbot was able to create a device which has the last sector of size 512. After failing to boot from initial sector, reading from boot info from offset 511 causes OOB read. To prevent such reports add sanity check to validate if size of buffer_head if big enough to hold ntfs3 bootinfo Fixes: 6a4cd3ea7d77 ("fs/ntfs3: Alternative boot if primary boot is corrupted") Reported-by: syzbot+53ce40c8c0322c06aea5@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: Konstantin Komarov --- fs/ntfs3/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index cf0a720523f058..5341ed80e2d2a1 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -878,6 +878,11 @@ static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, check_boot: err = -EINVAL; + + /* Corrupted image; do not read OOB */ + if (bh->b_size - sizeof(*boot) < boot_off) + goto out; + boot = (struct NTFS_BOOT *)Add2Ptr(bh->b_data, boot_off); if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) { From 1f9b94af923c88539426ed811ae7e9543834a5c5 Mon Sep 17 00:00:00 2001 From: Ziqi Zhao Date: Wed, 9 Aug 2023 12:11:18 -0700 Subject: [PATCH 060/661] fs/ntfs3: Fix possible null-pointer dereference in hdr_find_e() Upon investigation of the C reproducer provided by Syzbot, it seemed the reproducer was trying to mount a corrupted NTFS filesystem, then issue a rename syscall to some nodes in the filesystem. This can be shown by modifying the reproducer to only include the mount syscall, and investigating the filesystem by e.g. `ls` and `rm` commands. As a result, during the problematic call to `hdr_fine_e`, the `inode` being supplied did not go through `indx_init`, hence the `cmp` function pointer was never set. The fix is simply to check whether `cmp` is not set, and return NULL if that's the case, in order to be consistent with other error scenarios of the `hdr_find_e` method. The rationale behind this patch is that: - We should prevent crashing the kernel even if the mounted filesystem is corrupted. Any syscalls made on the filesystem could return invalid, but the kernel should be able to sustain these calls. - Only very specific corruption would lead to this bug, so it would be a pretty rare case in actual usage anyways. Therefore, introducing a check to specifically protect against this bug seems appropriate. Because of its rarity, an `unlikely` clause is used to wrap around this nullity check. Reported-by: syzbot+60cf892fc31d1f4358fc@syzkaller.appspotmail.com Signed-off-by: Ziqi Zhao Signed-off-by: Konstantin Komarov --- fs/ntfs3/index.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 124c6e822623fe..cf92b2433f7a75 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -729,6 +729,9 @@ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, u32 total = le32_to_cpu(hdr->total); u16 offs[128]; + if (unlikely(!cmp)) + return NULL; + fill_table: if (end > total) return NULL; From 85a4780dc96ed9dd643bbadf236552b3320fae26 Mon Sep 17 00:00:00 2001 From: Gabriel Marcano Date: Tue, 12 Sep 2023 21:50:32 -0700 Subject: [PATCH 061/661] fs/ntfs3: Fix directory element type detection Calling stat() from userspace correctly identified junctions in an NTFS partition as symlinks, but using readdir() and iterating through the directory containing the same junction did not identify the junction as a symlink. When emitting directory contents, check FILE_ATTRIBUTE_REPARSE_POINT attribute to detect junctions and report them as links. Signed-off-by: Gabriel Marcano Signed-off-by: Konstantin Komarov --- fs/ntfs3/dir.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 063a6654199bc0..ec0566b322d5d0 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,7 +309,11 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; } - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ + if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) + dt_type = DT_LNK; + else + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } From e4494770a5cad3c9d1d2a65ed15d07656c0d9b82 Mon Sep 17 00:00:00 2001 From: Su Hui Date: Mon, 25 Sep 2023 12:48:07 +0800 Subject: [PATCH 062/661] fs/ntfs3: Avoid possible memory leak smatch warn: fs/ntfs3/fslog.c:2172 last_log_lsn() warn: possible memory leak of 'page_bufs' Jump to label 'out' to free 'page_bufs' and is more consistent with other code. Signed-off-by: Su Hui Signed-off-by: Konstantin Komarov --- fs/ntfs3/fslog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 12f28cdf5c838b..98ccb665085831 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -2168,8 +2168,10 @@ static int last_log_lsn(struct ntfs_log *log) if (!page) { page = kmalloc(log->page_size, GFP_NOFS); - if (!page) - return -ENOMEM; + if (!page) { + err = -ENOMEM; + goto out; + } } /* From 089667aaaa6aa33715d22b21a72216b43af3e896 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Aug 2023 17:28:13 +0200 Subject: [PATCH 063/661] phy: realtek: Realtek PHYs should depend on ARCH_REALTEK The Realtek SoC USB2 and USB3 PHY Transceivers are only present on Realtek Digital Home Center (DHC) RTD series SoCs. Hence add a dependency on ARCH_REALTEK, to prevent asking the user about these drivers when configuring a kernel without Realtek SoC support. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/2892527cac9af6fa8f5e7b8daeffd7d4351fde68.1692113167.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- drivers/phy/realtek/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/phy/realtek/Kconfig b/drivers/phy/realtek/Kconfig index 650e20ed69af41..75ac7e7c31aec6 100644 --- a/drivers/phy/realtek/Kconfig +++ b/drivers/phy/realtek/Kconfig @@ -2,6 +2,9 @@ # # Phy drivers for Realtek platforms # + +if ARCH_REALTEK || COMPILE_TEST + config PHY_RTK_RTD_USB2PHY tristate "Realtek RTD USB2 PHY Transceiver Driver" depends on USB_SUPPORT @@ -25,3 +28,5 @@ config PHY_RTK_RTD_USB3PHY The DHC (digital home center) RTD series SoCs used the Synopsys DWC3 USB IP. This driver will do the PHY initialization of the parameters. + +endif # ARCH_REALTEK || COMPILE_TEST From 7a48b58eb5ff3798f0480d2da16bf27df9654fc7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 10:16:05 +0300 Subject: [PATCH 064/661] perf dlfilter: Fix use of addr_location__exit() in dlfilter__object_code() Stop calling addr_location__exit() when addr_location__init() was not called. Fixes: 0dd5041c9a0e ("perf addr_location: Add init/exit/copy functions") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20230928071605.17624-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim --- tools/perf/util/dlfilter.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 1dbf27822ee28f..4a1dc21b0450bc 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -282,13 +282,21 @@ static struct perf_event_attr *dlfilter__attr(void *ctx) return &d->evsel->core.attr; } +static __s32 code_read(__u64 ip, struct map *map, struct machine *machine, void *buf, __u32 len) +{ + u64 offset = map__map_ip(map, ip); + + if (ip + len >= map__end(map)) + len = map__end(map) - ip; + + return dso__data_read_offset(map__dso(map), machine, offset, buf, len); +} + static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) { struct dlfilter *d = (struct dlfilter *)ctx; struct addr_location *al; struct addr_location a; - struct map *map; - u64 offset; __s32 ret; if (!d->ctx_valid) @@ -298,27 +306,17 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) if (!al) return -1; - map = al->map; - - if (map && ip >= map__start(map) && ip < map__end(map) && + if (al->map && ip >= map__start(al->map) && ip < map__end(al->map) && machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip)) - goto have_map; + return code_read(ip, al->map, d->machine, buf, len); addr_location__init(&a); + thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); - if (!a.map) { - ret = -1; - goto out; - } + ret = a.map ? code_read(ip, a.map, d->machine, buf, len) : -1; - map = a.map; -have_map: - offset = map__map_ip(map, ip); - if (ip + len >= map__end(map)) - len = map__end(map) - ip; - ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); -out: addr_location__exit(&a); + return ret; } From f38f547314b858b94d36aeb9a4401f0aade7d1af Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 28 Sep 2023 12:10:33 +0300 Subject: [PATCH 065/661] perf dlfilter: Add a test for object_code() Extend the "dlfilter C API" test to test perf_dlfilter_fns.object_code(). Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20230928091033.33998-1-adrian.hunter@intel.com Signed-off-by: Namhyung Kim --- tools/perf/dlfilters/dlfilter-test-api-v0.c | 12 +++++++++++- tools/perf/dlfilters/dlfilter-test-api-v2.c | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 72f263d491211f..4083b1abeaabe6 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -289,6 +289,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -314,7 +323,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 38e593d92920c7..32ff619e881caa 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -308,6 +308,15 @@ static int check_attr(void *ctx) return 0; } +static int check_object_code(void *ctx, const struct perf_dlfilter_sample *sample) +{ + __u8 buf[15]; + + CHECK(perf_dlfilter_fns.object_code(ctx, sample->ip, buf, sizeof(buf)) > 0); + + return 0; +} + static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) { struct filter_data *d = data; @@ -333,7 +342,8 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample) || + check_object_code(ctx, sample)) return -1; if (early) From 2ec8b010979036c2fe79a64adb6ecc0bd11e91d1 Mon Sep 17 00:00:00 2001 From: "William A. Kennington III" Date: Fri, 22 Sep 2023 11:28:12 -0700 Subject: [PATCH 066/661] spi: npcm-fiu: Fix UMA reads when dummy.nbytes == 0 We don't want to use the value of ilog2(0) as dummy.buswidth is 0 when dummy.nbytes is 0. Since we have no dummy bytes, we don't need to configure the dummy byte bits per clock register value anyway. Signed-off-by: "William A. Kennington III" Link: https://lore.kernel.org/r/20230922182812.2728066-1-william@wkennington.com Signed-off-by: Mark Brown --- drivers/spi/spi-npcm-fiu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c index 0ca21ff0e9cc12..e422485196884f 100644 --- a/drivers/spi/spi-npcm-fiu.c +++ b/drivers/spi/spi-npcm-fiu.c @@ -353,8 +353,9 @@ static int npcm_fiu_uma_read(struct spi_mem *mem, uma_cfg |= ilog2(op->cmd.buswidth); uma_cfg |= ilog2(op->addr.buswidth) << NPCM_FIU_UMA_CFG_ADBPCK_SHIFT; - uma_cfg |= ilog2(op->dummy.buswidth) - << NPCM_FIU_UMA_CFG_DBPCK_SHIFT; + if (op->dummy.nbytes) + uma_cfg |= ilog2(op->dummy.buswidth) + << NPCM_FIU_UMA_CFG_DBPCK_SHIFT; uma_cfg |= ilog2(op->data.buswidth) << NPCM_FIU_UMA_CFG_RDBPCK_SHIFT; uma_cfg |= op->dummy.nbytes << NPCM_FIU_UMA_CFG_DBSIZ_SHIFT; From b15e7490a1efd4c0f54434c3a85ced1b6b536b7a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 21 Sep 2023 10:16:41 -0700 Subject: [PATCH 067/661] KVM: selftests: Treat %llx like %lx when formatting guest printf Treat %ll* formats the same as %l* formats when processing printfs from the guest so that using e.g. %llx instead of %lx generates the expected output. Ideally, unexpected formats would generate compile-time warnings or errors, but it's not at all obvious how to actually accomplish that. Alternatively, guest_vsnprintf() could assert on an unexpected format, but since the vast majority of printfs are for failed guest asserts, getting *something* printed is better than nothing. E.g. before ==== Test Assertion Failure ==== x86_64/private_mem_conversions_test.c:265: mem[i] == 0 pid=4286 tid=4290 errno=4 - Interrupted system call 1 0x0000000000401c74: __test_mem_conversions at private_mem_conversions_test.c:336 2 0x00007f3aae6076da: ?? ??:0 3 0x00007f3aae32161e: ?? ??:0 Expected 0x0 at offset 0 (gpa 0x%lx), got 0x0 and after ==== Test Assertion Failure ==== x86_64/private_mem_conversions_test.c:265: mem[i] == 0 pid=5664 tid=5668 errno=4 - Interrupted system call 1 0x0000000000401c74: __test_mem_conversions at private_mem_conversions_test.c:336 2 0x00007fbe180076da: ?? ??:0 3 0x00007fbe17d2161e: ?? ??:0 Expected 0x0 at offset 0 (gpa 0x100000000), got 0xcc Fixes: e5119382499c ("KVM: selftests: Add guest_snprintf() to KVM selftests") Cc: Aaron Lewis Link: https://lore.kernel.org/r/20230921171641.3641776-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/guest_sprintf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index c4a69d8aeb68cc..74627514c4d44d 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -200,6 +200,13 @@ int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) ++fmt; } + /* + * Play nice with %llu, %llx, etc. KVM selftests only support + * 64-bit builds, so just treat %ll* the same as %l*. + */ + if (qualifier == 'l' && *fmt == 'l') + ++fmt; + /* default base */ base = 10; From 332c4d90a09c2cdc59f88d6a6c58c1601403b891 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 14 Sep 2023 17:48:03 +0800 Subject: [PATCH 068/661] KVM: selftests: Remove obsolete and incorrect test case metadata Delete inaccurate descriptions and obsolete metadata for test cases. It adds zero value, and has a non-zero chance of becoming stale and misleading in the future. No functional changes intended. Suggested-by: Sean Christopherson Signed-off-by: Like Xu Link: https://lore.kernel.org/r/20230914094803.94661-1-likexu@tencent.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/ucall_common.h | 2 -- tools/testing/selftests/kvm/lib/x86_64/apic.c | 2 -- tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c | 2 -- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c | 2 -- tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh | 1 - tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c | 4 ---- tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c | 4 ---- 7 files changed, 17 deletions(-) diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 112bc1da732a44..ce33d306c2cba7 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UCALL_COMMON_H diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c index 7168e25c194e1d..89153a333e83c2 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index e446d76d1c0c38..6c127856209065 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * KVM_GET/SET_* tests - * * Copyright (C) 2022, Red Hat, Inc. * * Tests for Hyper-V extensions to SVM. diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 7f36c32fa76093..18ac5c1952a3a3 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/nx_huge_page_test.c - * * Usage: to be run via nx_huge_page_test.sh, which does the necessary * environment setup and teardown * diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 0560149e66ed3b..7cbb409801eea7 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -4,7 +4,6 @@ # Wrapper script which performs setup and cleanup for nx_huge_pages_test. # Makes use of root privileges to set up huge pages and KVM module parameters. # -# tools/testing/selftests/kvm/nx_huge_page_test.sh # Copyright (C) 2022, Google LLC. set -e diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c index 5b669818e39aaa..59c7304f805efd 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 05898ad9f4d992..9ec9ab60b63ee2 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" From 84fa1865edbb3800f3344e2a5bc73c187adf42d0 Mon Sep 17 00:00:00 2001 From: Ermin Sunj Date: Thu, 7 Sep 2023 17:17:15 +0200 Subject: [PATCH 069/661] arm64: dts: rockchip: use codec as clock master on px30-ringneck-haikou MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the codec is not the clock master, the MCLK needs to be synchronous to both I2S_SCL ans I2S_LRCLK. We do not have that on Haikou, causing distorted audio. Before: Running an audio test script on Ringneck, 1kHz output sine wave is not stable and shows distortion. After: 10h audio test script loop failed only one time. That is 0.00014% failure rate. Cc: stable@vger.kernel.org Fixes: c484cf93f61b ("arm64: dts: rockchip: add PX30-µQ7 (Ringneck) SoM with Haikou baseboard") Signed-off-by: Ermin Sunj Signed-off-by: Jakob Unterwurzacher Link: https://lore.kernel.org/r/20230907151725.198347-1-jakob.unterwurzacher@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index 08a3ad3e7ae922..8792fae50257ad 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -68,15 +68,15 @@ simple-audio-card,format = "i2s"; simple-audio-card,name = "Haikou,I2S-codec"; simple-audio-card,mclk-fs = <512>; + simple-audio-card,frame-master = <&sgtl5000_codec>; + simple-audio-card,bitclock-master = <&sgtl5000_codec>; - simple-audio-card,codec { + sgtl5000_codec: simple-audio-card,codec { clocks = <&sgtl5000_clk>; sound-dai = <&sgtl5000>; }; simple-audio-card,cpu { - bitclock-master; - frame-master; sound-dai = <&i2s0_8ch>; }; }; From 1e585cd0aad3d491938230318d6d479f09589fd8 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Thu, 7 Sep 2023 17:17:16 +0200 Subject: [PATCH 070/661] arm64: dts: rockchip: set codec system-clock-fixed on px30-ringneck-haikou MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having sgtl5000_clk defines as "fixed-clock" is not enough to prevent the dai subsystem from overwriting the frequency via sgtl5000_set_dai_sysclk. Setting system-clock-fixed does the job, and now a 1kHz sine wave comes out as actually 1kHz, no matter the sample rate of the source. Testcase: These should sound the same: speaker-test -r 48000 -t sine -f 1000 speaker-test -r 24000 -t sine -f 1000 Also remove the clock link here as having it in sgtl5000 and sgtl5000_codec causes duplicate clock unprepares with associated backtrace. Cc: stable@vger.kernel.org Fixes: c484cf93f61b ("arm64: dts: rockchip: add PX30-µQ7 (Ringneck) SoM with Haikou baseboard") Signed-off-by: Jakob Unterwurzacher Link: https://lore.kernel.org/r/20230907151725.198347-2-jakob.unterwurzacher@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index 8792fae50257ad..de0a1f2af983be 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -72,8 +72,10 @@ simple-audio-card,bitclock-master = <&sgtl5000_codec>; sgtl5000_codec: simple-audio-card,codec { - clocks = <&sgtl5000_clk>; sound-dai = <&sgtl5000>; + // Prevent the dai subsystem from overwriting the clock + // frequency. We are using a fixed-frequency oscillator. + system-clock-fixed; }; simple-audio-card,cpu { From 2e9cbc4167da3134412ce47e4cdadbfdea30bbff Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Tue, 29 Aug 2023 22:37:21 +0200 Subject: [PATCH 071/661] ARM: dts: rockchip: Fix i2c0 register address for RK3128 The register address for i2c0 is missing a 0x to mark it as hex. Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20230829203721.281455-6-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index b63bd4ad3143c5..2e345097b9bdad 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -426,7 +426,7 @@ i2c0: i2c@20072000 { compatible = "rockchip,rk3128-i2c", "rockchip,rk3288-i2c"; - reg = <20072000 0x1000>; + reg = <0x20072000 0x1000>; interrupts = ; clock-names = "i2c"; clocks = <&cru PCLK_I2C0>; From 7e3be9ea299927e6d65242c247eca0a21bc26a58 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Tue, 29 Aug 2023 22:37:23 +0200 Subject: [PATCH 072/661] ARM: dts: rockchip: Add missing arm timer interrupt for RK3128 The Cortex-A7 timer has 4 interrupts. Add the missing one. Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20230829203721.281455-8-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 2e345097b9bdad..bf55d4575311a2 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -64,7 +64,8 @@ compatible = "arm,armv7-timer"; interrupts = , , - ; + , + ; arm,cpu-registers-not-fw-configured; clock-frequency = <24000000>; }; From b0b4e978784943c4ed8412dbb475178f8c51ba8e Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Tue, 29 Aug 2023 22:37:25 +0200 Subject: [PATCH 073/661] ARM: dts: rockchip: Add missing quirk for RK3128's dma engine Like most other Rockchip ARM SoCs, the PL330 needs the arm,pl330-periph-burst quirk in order to work as expected. Add it. Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20230829203721.281455-10-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index bf55d4575311a2..9125bf22e971c2 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -459,6 +459,7 @@ interrupts = , ; arm,pl330-broken-no-flushp; + arm,pl330-periph-burst; clocks = <&cru ACLK_DMAC>; clock-names = "apb_pclk"; #dma-cells = <1>; From 2c68d26f072b449bd45427241612cb3f8f997f82 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Tue, 29 Aug 2023 22:37:27 +0200 Subject: [PATCH 074/661] ARM: dts: rockchip: Fix timer clocks for RK3128 Currently the Rockchip timer source clocks are set to xin24 for no obvious reason and the actual timer clocks (SCLK_TIMER*) will get disabled during boot process as they have no user. That will make the SoC stuck as no timer source exists. Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20230829203721.281455-12-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 9125bf22e971c2..88a4b0d6d928d4 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -234,7 +234,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044000 0x20>; interrupts = ; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER0>; clock-names = "pclk", "timer"; }; @@ -242,7 +242,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044020 0x20>; interrupts = ; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER1>; clock-names = "pclk", "timer"; }; @@ -250,7 +250,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044040 0x20>; interrupts = ; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER2>; clock-names = "pclk", "timer"; }; @@ -258,7 +258,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044060 0x20>; interrupts = ; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER3>; clock-names = "pclk", "timer"; }; @@ -266,7 +266,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x20044080 0x20>; interrupts = ; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER4>; clock-names = "pclk", "timer"; }; @@ -274,7 +274,7 @@ compatible = "rockchip,rk3128-timer", "rockchip,rk3288-timer"; reg = <0x200440a0 0x20>; interrupts = ; - clocks = <&cru PCLK_TIMER>, <&xin24m>; + clocks = <&cru PCLK_TIMER>, <&cru SCLK_TIMER5>; clock-names = "pclk", "timer"; }; From ec4405ed92036f5bb487b5c2f9a28f9e36a3e3d5 Mon Sep 17 00:00:00 2001 From: Gil Fine Date: Thu, 10 Aug 2023 23:18:25 +0300 Subject: [PATCH 075/661] thunderbolt: Call tb_switch_put() once DisplayPort bandwidth request is finished When handling DisplayPort bandwidth request tb_switch_find_by_route() is called and it returns a router structure with reference count increased. In order to avoid resource leak call tb_switch_put() when finished. Fixes: 6ce3563520be ("thunderbolt: Add support for DisplayPort bandwidth allocation mode") Cc: stable@vger.kernel.org Signed-off-by: Gil Fine Signed-off-by: Mika Westerberg --- drivers/thunderbolt/tb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index dd0a1ef8cf1217..27bd6ca6f99e41 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -1907,14 +1907,14 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) in = &sw->ports[ev->port]; if (!tb_port_is_dpin(in)) { tb_port_warn(in, "bandwidth request to non-DP IN adapter\n"); - goto unlock; + goto put_sw; } tb_port_dbg(in, "handling bandwidth allocation request\n"); if (!usb4_dp_port_bandwidth_mode_enabled(in)) { tb_port_warn(in, "bandwidth allocation mode not enabled\n"); - goto unlock; + goto put_sw; } ret = usb4_dp_port_requested_bandwidth(in); @@ -1923,7 +1923,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tb_port_dbg(in, "no bandwidth request active\n"); else tb_port_warn(in, "failed to read requested bandwidth\n"); - goto unlock; + goto put_sw; } requested_bw = ret; @@ -1932,7 +1932,7 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tunnel = tb_find_tunnel(tb, TB_TUNNEL_DP, in, NULL); if (!tunnel) { tb_port_warn(in, "failed to find tunnel\n"); - goto unlock; + goto put_sw; } out = tunnel->dst_port; @@ -1959,6 +1959,8 @@ static void tb_handle_dp_bandwidth_request(struct work_struct *work) tb_recalc_estimated_bandwidth(tb); } +put_sw: + tb_switch_put(sw); unlock: mutex_unlock(&tb->lock); From 3820c4fdc247b6f0a4162733bdb8ddf8f2e8a1e4 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 31 Jul 2023 12:37:58 +0200 Subject: [PATCH 076/661] nvme-rdma: do not try to stop unallocated queues Trying to stop a queue which hasn't been allocated will result in a warning due to calling mutex_lock() against an uninitialized mutex. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 4 PID: 104150 at kernel/locking/mutex.c:579 Call trace: RIP: 0010:__mutex_lock+0x1173/0x14a0 nvme_rdma_stop_queue+0x1b/0xa0 [nvme_rdma] nvme_rdma_teardown_io_queues.part.0+0xb0/0x1d0 [nvme_rdma] nvme_rdma_delete_ctrl+0x50/0x100 [nvme_rdma] nvme_do_delete_ctrl+0x149/0x158 [nvme_core] Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 337a624a537ce1..a7fea4cbacd753 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -638,6 +638,9 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + return; + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) __nvme_rdma_stop_queue(queue); From 6313e096dbfaf1377ba8f5f8ccd720cc36c576c6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 4 Oct 2023 17:29:54 -0700 Subject: [PATCH 077/661] KVM: selftests: Zero-initialize entire test_result in memslot perf test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zero-initialize the entire test_result structure used by memslot_perf_test instead of zeroing only the fields used to guard the pr_info() calls. gcc 13.2.0 is a bit overzealous and incorrectly thinks that rbestslottime's slot_runtime may be used uninitialized. In file included from memslot_perf_test.c:25: memslot_perf_test.c: In function ‘main’: include/test_util.h:31:22: error: ‘rbestslottime.slot_runtime.tv_nsec’ may be used uninitialized [-Werror=maybe-uninitialized] 31 | #define pr_info(...) printf(__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~ memslot_perf_test.c:1127:17: note: in expansion of macro ‘pr_info’ 1127 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | ^~~~~~~ memslot_perf_test.c:1092:28: note: ‘rbestslottime.slot_runtime.tv_nsec’ was declared here 1092 | struct test_result rbestslottime; | ^~~~~~~~~~~~~ include/test_util.h:31:22: error: ‘rbestslottime.slot_runtime.tv_sec’ may be used uninitialized [-Werror=maybe-uninitialized] 31 | #define pr_info(...) printf(__VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~ memslot_perf_test.c:1127:17: note: in expansion of macro ‘pr_info’ 1127 | pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", | ^~~~~~~ memslot_perf_test.c:1092:28: note: ‘rbestslottime.slot_runtime.tv_sec’ was declared here 1092 | struct test_result rbestslottime; | ^~~~~~~~~~~~~ That can't actually happen, at least not without the "result" structure in test_loop() also being used uninitialized, which gcc doesn't complain about, as writes to rbestslottime are all-or-nothing, i.e. slottimens can't be non-zero without slot_runtime being written. if (!data->mem_size && (!rbestslottime->slottimens || result.slottimens < rbestslottime->slottimens)) *rbestslottime = result; Zero-initialize the structures to make gcc happy even though this is likely a compiler bug. The cost to do so is negligible, both in terms of code and runtime overhead. The only downside is that the compiler won't warn about legitimate usage of "uninitialized" data, e.g. the test could end up consuming zeros instead of useful data. However, given that the test is quite mature and unlikely to see substantial changes, the odds of introducing such bugs are relatively low, whereas being able to compile KVM selftests with -Werror detects issues on a regular basis. Reviewed-by: Maciej S. Szmigiero Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231005002954.2887098-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/memslot_perf_test.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 20eb2e730800e3..8698d1ab60d00f 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestruntime) { uint64_t maxslots; - struct test_result result; + struct test_result result = {}; - result.nloops = 0; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, &result.nloops, &result.slot_runtime, &result.guest_runtime)) { @@ -1089,7 +1088,7 @@ int main(int argc, char *argv[]) .seconds = 5, .runs = 1, }; - struct test_result rbestslottime; + struct test_result rbestslottime = {}; int tctr; if (!check_memory_sizes()) @@ -1098,11 +1097,10 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv, &targs)) return -1; - rbestslottime.slottimens = 0; for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { const struct test_data *data = &tests[tctr]; unsigned int runctr; - struct test_result rbestruntime; + struct test_result rbestruntime = {}; if (tctr > targs.tfirst) pr_info("\n"); @@ -1110,7 +1108,6 @@ int main(int argc, char *argv[]) pr_info("Testing %s performance with %i runs, %d seconds each\n", data->name, targs.runs, targs.seconds); - rbestruntime.runtimens = 0; for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs, &rbestslottime, &rbestruntime)) From 7ec4cd3c1a12dc08c60d5e376c2c05aae23f1e41 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Thu, 5 Oct 2023 07:56:14 +0000 Subject: [PATCH 078/661] platform: mellanox: Fix a resource leak in an error handling path in probing flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix missed resource deallocation in rollback flows. Currently if an error occurs after a successful mlxplat_i2c_main_init(), mlxplat_i2c_main_exit() call is missed in rollback flow. Thus, some resources are not de-allocated. Move mlxplat_pre_exit() call from mlxplat_remove() into mlxplat_i2c_main_exit(). Call mlxplat_i2c_main_exit() instead of calling mlxplat_pre_exit() in mlxplat_probe() error handling flow. Unregister 'priv->pdev_i2c' device in mlxplat_i2c_main_init() cleanup flow if this device was successfully registered. Fixes: 158cd8320776 ("platform: mellanox: Split logic in init and exit flow") Reported-by: Christophe JAILLET Closes: https://lore.kernel.org/lkml/70165032-796e-6f5c-6748-f514e3b9d08c@linux.intel.com/T/ Signed-off-by: Vadim Pasternak Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231005075616.42777-2-vadimp@nvidia.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/mlx-platform.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 3d96dbf79a7254..a2ffe4157df10c 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -6514,6 +6514,7 @@ static int mlxplat_i2c_main_init(struct mlxplat_priv *priv) return 0; fail_mlxplat_i2c_mux_topology_init: + platform_device_unregister(priv->pdev_i2c); fail_platform_i2c_register: fail_mlxplat_mlxcpld_verify_bus_topology: return err; @@ -6521,6 +6522,7 @@ static int mlxplat_i2c_main_init(struct mlxplat_priv *priv) static void mlxplat_i2c_main_exit(struct mlxplat_priv *priv) { + mlxplat_pre_exit(priv); mlxplat_i2c_mux_topology_exit(priv); if (priv->pdev_i2c) platform_device_unregister(priv->pdev_i2c); @@ -6597,7 +6599,7 @@ static int mlxplat_probe(struct platform_device *pdev) fail_register_reboot_notifier: fail_regcache_sync: - mlxplat_pre_exit(priv); + mlxplat_i2c_main_exit(priv); fail_mlxplat_i2c_main_init: fail_regmap_write: fail_alloc: @@ -6614,7 +6616,6 @@ static int mlxplat_remove(struct platform_device *pdev) pm_power_off = NULL; if (mlxplat_reboot_nb) unregister_reboot_notifier(mlxplat_reboot_nb); - mlxplat_pre_exit(priv); mlxplat_i2c_main_exit(priv); mlxplat_post_exit(); return 0; From 61e21cf2d2c3cc5e60e8d0a62a77e250fccda62c Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Wed, 27 Sep 2023 17:44:01 +0800 Subject: [PATCH 079/661] mm/page_alloc: correct start page when guard page debug is enabled When guard page debug is enabled and set_page_guard returns success, we miss to forward page to point to start of next split range and we will do split unexpectedly in page range without target page. Move start page update before set_page_guard to fix this. As we split to wrong target page, then splited pages are not able to merge back to original order when target page is put back and splited pages except target page is not usable. To be specific: Consider target page is the third page in buddy page with order 2. | buddy-2 | Page | Target | Page | After break down to target page, we will only set first page to Guard because of bug. | Guard | Page | Target | Page | When we try put_page_back_buddy with target page, the buddy page of target if neither guard nor buddy, Then it's not able to construct original page with order 2 | Guard | Page | buddy-0 | Page | All pages except target page is not in free list and is not usable. Link: https://lkml.kernel.org/r/20230927094401.68205-1-shikemeng@huaweicloud.com Fixes: 06be6ff3d2ec ("mm,hwpoison: rework soft offline for free pages") Signed-off-by: Kemeng Shi Acked-by: Naoya Horiguchi Cc: Matthew Wilcox (Oracle) Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 95546f3763021a..85741403948f55 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6475,6 +6475,7 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page, next_page = page; current_buddy = page + size; } + page = next_page; if (set_page_guard(zone, current_buddy, high, migratetype)) continue; @@ -6482,7 +6483,6 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page, if (current_buddy != target) { add_to_free_list(current_buddy, zone, high, migratetype); set_buddy_order(current_buddy, high); - page = next_page; } } } From 51f625377561e5b167da2db5aafb7ee268f691c5 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 28 Sep 2023 13:24:32 -0400 Subject: [PATCH 080/661] mm/mempolicy: fix set_mempolicy_home_node() previous VMA pointer The two users of mbind_range() are expecting that mbind_range() will update the pointer to the previous VMA, or return an error. However, set_mempolicy_home_node() does not call mbind_range() if there is no VMA policy. The fix is to update the pointer to the previous VMA prior to continuing iterating the VMAs when there is no policy. Users may experience a WARN_ON() during VMA policy updates when updating a range of VMAs on the home node. Link: https://lkml.kernel.org/r/20230928172432.2246534-1-Liam.Howlett@oracle.com Link: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/ Fixes: f4e9e0e69468 ("mm/mempolicy: fix use-after-free of VMA iterator") Signed-off-by: Liam R. Howlett Reported-by: Yikebaer Aizezi Closes: https://lore.kernel.org/linux-mm/CALcu4rbT+fMVNaO_F2izaCT+e7jzcAciFkOvk21HGJsmLcUuwQ@mail.gmail.com/ Reviewed-by: Lorenzo Stoakes Cc: Signed-off-by: Andrew Morton --- mm/mempolicy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f1b00d6ac7ee5c..29ebf1e7898cf0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1543,8 +1543,10 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le * the home node for vmas we already updated before. */ old = vma_policy(vma); - if (!old) + if (!old) { + prev = vma; continue; + } if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { err = -EOPNOTSUPP; break; From e0f81ab1e4f42ffece6440dc78f583eb352b9a71 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 29 Sep 2023 10:19:41 -0700 Subject: [PATCH 081/661] mm: fix vm_brk_flags() to not bail out while holding lock Calling vm_brk_flags() with flags set other than VM_EXEC will exit the function without releasing the mmap_write_lock. Just do the sanity check before the lock is acquired. This doesn't fix an actual issue since no caller sets a flag other than VM_EXEC. Link: https://lkml.kernel.org/r/20230929171937.work.697-kees@kernel.org Fixes: 2e7ce7d354f2 ("mm/mmap: change do_brk_flags() to expand existing VMA and add do_brk_munmap()") Signed-off-by: Sebastian Ott Signed-off-by: Kees Cook Reviewed-by: Liam R. Howlett Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/mmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index b56a7f0c9f8565..7ed286662839d1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3143,13 +3143,13 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) if (!len) return 0; - if (mmap_write_lock_killable(mm)) - return -EINTR; - /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; + if (mmap_write_lock_killable(mm)) + return -EINTR; + ret = check_brk_limits(addr, len); if (ret) goto limits_failed; From 1419430c8abb5a00590169068590dd54d86590ba Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 29 Sep 2023 14:30:39 -0400 Subject: [PATCH 082/661] mmap: fix vma_iterator in error path of vma_merge() During the error path, the vma iterator may not be correctly positioned or set to the correct range. Undo the vma_prev() call by resetting to the passed in address. Re-walking to the same range will fix the range to the area previously passed in. Users would notice increased cycles as vma_merge() would be called an extra time with vma == prev, and thus would fail to merge and return. Link: https://lore.kernel.org/linux-mm/CAG48ez12VN1JAOtTNMY+Y2YnsU45yL5giS-Qn=ejtiHpgJAbdQ@mail.gmail.com/ Link: https://lkml.kernel.org/r/20230929183041.2835469-2-Liam.Howlett@oracle.com Fixes: 18b098af2890 ("vma_merge: set vma iterator to correct position.") Signed-off-by: Liam R. Howlett Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez12VN1JAOtTNMY+Y2YnsU45yL5giS-Qn=ejtiHpgJAbdQ@mail.gmail.com/ Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/mmap.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 7ed286662839d1..a0917ed260577d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -975,7 +975,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, /* Error in anon_vma clone. */ if (err) - return NULL; + goto anon_vma_fail; if (vma_start < vma->vm_start || vma_end > vma->vm_end) vma_expanded = true; @@ -988,7 +988,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, } if (vma_iter_prealloc(vmi, vma)) - return NULL; + goto prealloc_fail; init_multi_vma_prep(&vp, vma, adjust, remove, remove2); VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && @@ -1016,6 +1016,12 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_complete(&vp, vmi, mm); khugepaged_enter_vma(res, vm_flags); return res; + +prealloc_fail: +anon_vma_fail: + vma_iter_set(vmi, addr); + vma_iter_load(vmi); + return NULL; } /* From 824135c46b00df7fb369ec7f1f8607427bbebeb0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 29 Sep 2023 14:30:40 -0400 Subject: [PATCH 083/661] mmap: fix error paths with dup_anon_vma() When the calling function fails after the dup_anon_vma(), the duplication of the anon_vma is not being undone. Add the necessary unlink_anon_vma() call to the error paths that are missing them. This issue showed up during inspection of the error path in vma_merge() for an unrelated vma iterator issue. Users may experience increased memory usage, which may be problematic as the failure would likely be caused by a low memory situation. Link: https://lkml.kernel.org/r/20230929183041.2835469-3-Liam.Howlett@oracle.com Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") Signed-off-by: Liam R. Howlett Reviewed-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Jann Horn Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Cc: Signed-off-by: Andrew Morton --- mm/mmap.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index a0917ed260577d..9e018d8dd7d693 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -583,11 +583,12 @@ static inline void vma_complete(struct vma_prepare *vp, * dup_anon_vma() - Helper function to duplicate anon_vma * @dst: The destination VMA * @src: The source VMA + * @dup: Pointer to the destination VMA when successful. * * Returns: 0 on success. */ static inline int dup_anon_vma(struct vm_area_struct *dst, - struct vm_area_struct *src) + struct vm_area_struct *src, struct vm_area_struct **dup) { /* * Easily overlooked: when mprotect shifts the boundary, make sure the @@ -595,9 +596,15 @@ static inline int dup_anon_vma(struct vm_area_struct *dst, * anon pages imported. */ if (src->anon_vma && !dst->anon_vma) { + int ret; + vma_assert_write_locked(dst); dst->anon_vma = src->anon_vma; - return anon_vma_clone(dst, src); + ret = anon_vma_clone(dst, src); + if (ret) + return ret; + + *dup = dst; } return 0; @@ -624,6 +631,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *next) { + struct vm_area_struct *anon_dup = NULL; bool remove_next = false; struct vma_prepare vp; @@ -633,7 +641,7 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, remove_next = true; vma_start_write(next); - ret = dup_anon_vma(vma, next); + ret = dup_anon_vma(vma, next, &anon_dup); if (ret) return ret; } @@ -661,6 +669,8 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, return 0; nomem: + if (anon_dup) + unlink_anon_vmas(anon_dup); return -ENOMEM; } @@ -860,6 +870,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, { struct vm_area_struct *curr, *next, *res; struct vm_area_struct *vma, *adjust, *remove, *remove2; + struct vm_area_struct *anon_dup = NULL; struct vma_prepare vp; pgoff_t vma_pgoff; int err = 0; @@ -927,18 +938,18 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_start_write(next); remove = next; /* case 1 */ vma_end = next->vm_end; - err = dup_anon_vma(prev, next); + err = dup_anon_vma(prev, next, &anon_dup); if (curr) { /* case 6 */ vma_start_write(curr); remove = curr; remove2 = next; if (!next->anon_vma) - err = dup_anon_vma(prev, curr); + err = dup_anon_vma(prev, curr, &anon_dup); } } else if (merge_prev) { /* case 2 */ if (curr) { vma_start_write(curr); - err = dup_anon_vma(prev, curr); + err = dup_anon_vma(prev, curr, &anon_dup); if (end == curr->vm_end) { /* case 7 */ remove = curr; } else { /* case 5 */ @@ -954,7 +965,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_end = addr; adjust = next; adj_start = -(prev->vm_end - addr); - err = dup_anon_vma(next, prev); + err = dup_anon_vma(next, prev, &anon_dup); } else { /* * Note that cases 3 and 8 are the ONLY ones where prev @@ -968,7 +979,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, vma_pgoff = curr->vm_pgoff; vma_start_write(curr); remove = curr; - err = dup_anon_vma(next, curr); + err = dup_anon_vma(next, curr, &anon_dup); } } } @@ -1018,6 +1029,9 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, return res; prealloc_fail: + if (anon_dup) + unlink_anon_vmas(anon_dup); + anon_vma_fail: vma_iter_set(vmi, addr); vma_iter_load(vmi); From 117b1bb0cbc7f5feab4fd251737869958987808c Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 28 Sep 2023 17:18:45 +0200 Subject: [PATCH 084/661] riscv: handle VM_FAULT_[HWPOISON|HWPOISON_LARGE] faults instead of panicking Patch series "Fix set_huge_pte_at()". A recent report [1] from Ryan for arm64 revealed that we do not handle swap entries when setting a hugepage backed by a NAPOT region (the contpte riscv equivalent). As explained in [1], the issue was discovered by a new test in kselftest which uses poison entries, but the symptoms are different from arm64 though: - the riscv kernel bugs because we do not handle VM_FAULT_HWPOISON*, this is fixed by patch 1, - after that, the test passes because the first pte_napot() fails (the poison entry does not have the N bit set), and then we only set the first page table entry covering the NAPOT hugepage, which is enough for hugetlb_fault() to correctly raise a VM_FAULT_HWPOISON wherever we write in this mapping since only this first page table entry is checked (see https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/hugetlb.c#L6071). But this seems fragile so patch 2 sets all page table entries of a NAPOT mapping. [1]: https://lore.kernel.org/linux-arm-kernel/20230922115804.2043771-1-ryan.roberts@arm.com/ This patch (of 2): We used to panic when such faults were encountered but we should handle those faults gracefully for userspace by sending a SIGBUS to the process, like most architectures do. Link: https://lkml.kernel.org/r/20230928151846.8229-1-alexghiti@rivosinc.com Link: https://lkml.kernel.org/r/20230928151846.8229-2-alexghiti@rivosinc.com Signed-off-by: Alexandre Ghiti Acked-by: Palmer Dabbelt Cc: Albert Ou Cc: Andrew Jones Cc: Conor Dooley Cc: Paul Walmsley Cc: Qinglin Pan Signed-off-by: Andrew Morton --- arch/riscv/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 6115d751497200..90d4ba36d1d062 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -72,7 +72,7 @@ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_f } pagefault_out_of_memory(); return; - } else if (fault & VM_FAULT_SIGBUS) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) { no_context(regs, addr); From 1de195dd0e05d9cba43dec16f83d4ee32af94dd2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 28 Sep 2023 17:18:46 +0200 Subject: [PATCH 085/661] riscv: fix set_huge_pte_at() for NAPOT mappings when a swap entry is set We used to determine the number of page table entries to set for a NAPOT hugepage by using the pte value which actually fails when the pte to set is a swap entry. So take advantage of a recent fix for arm64 reported in [1] which introduces the size of the mapping as an argument of set_huge_pte_at(): we can then use this size to compute the number of page table entries to set for a NAPOT region. Link: https://lkml.kernel.org/r/20230928151846.8229-3-alexghiti@rivosinc.com Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") Signed-off-by: Alexandre Ghiti Reported-by: Ryan Roberts Closes: https://lore.kernel.org/linux-arm-kernel/20230922115804.2043771-1-ryan.roberts@arm.com/ [1] Reviewed-by: Andrew Jones Cc: Albert Ou Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Qinglin Pan Cc: Conor Dooley Signed-off-by: Andrew Morton --- arch/riscv/mm/hugetlbpage.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index e4a2ace92dbeba..b52f0210481fac 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -183,15 +183,22 @@ void set_huge_pte_at(struct mm_struct *mm, pte_t pte, unsigned long sz) { + unsigned long hugepage_shift; int i, pte_num; - if (!pte_napot(pte)) { - set_pte_at(mm, addr, ptep, pte); - return; - } + if (sz >= PGDIR_SIZE) + hugepage_shift = PGDIR_SHIFT; + else if (sz >= P4D_SIZE) + hugepage_shift = P4D_SHIFT; + else if (sz >= PUD_SIZE) + hugepage_shift = PUD_SHIFT; + else if (sz >= PMD_SIZE) + hugepage_shift = PMD_SHIFT; + else + hugepage_shift = PAGE_SHIFT; - pte_num = napot_pte_num(napot_cont_order(pte)); - for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) + pte_num = sz >> hugepage_shift; + for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift)) set_pte_at(mm, addr, ptep, pte); } From 229e2253766c7cdfe024f1fe280020cc4711087c Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Tue, 3 Oct 2023 10:48:56 -0400 Subject: [PATCH 086/661] mm/migrate: fix do_pages_move for compat pointers do_pages_move does not handle compat pointers for the page list. correctly. Add in_compat_syscall check and appropriate get_user fetch when iterating the page list. It makes the syscall in compat mode (32-bit userspace, 64-bit kernel) work the same way as the native 32-bit syscall again, restoring the behavior before my broken commit 5b1b561ba73c ("mm: simplify compat_sys_move_pages"). More specifically, my patch moved the parsing of the 'pages' array from the main entry point into do_pages_stat(), which left the syscall working correctly for the 'stat' operation (nodes = NULL), while the 'move' operation (nodes != NULL) is now missing the conversion and interprets 'pages' as an array of 64-bit pointers instead of the intended 32-bit userspace pointers. It is possible that nobody noticed this bug because the few applications that actually call move_pages are unlikely to run in compat mode because of their large memory requirements, but this clearly fixes a user-visible regression and should have been caught by ltp. Link: https://lkml.kernel.org/r/20231003144857.752952-1-gregory.price@memverge.com Fixes: 5b1b561ba73c ("mm: simplify compat_sys_move_pages") Signed-off-by: Gregory Price Reported-by: Arnd Bergmann Co-developed-by: Arnd Bergmann Cc: Jonathan Cameron Cc: Signed-off-by: Andrew Morton --- mm/migrate.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 2053b54556ca5b..06086dc9da288f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2162,6 +2162,7 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, const int __user *nodes, int __user *status, int flags) { + compat_uptr_t __user *compat_pages = (void __user *)pages; int current_node = NUMA_NO_NODE; LIST_HEAD(pagelist); int start, i; @@ -2174,8 +2175,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, int node; err = -EFAULT; - if (get_user(p, pages + i)) - goto out_flush; + if (in_compat_syscall()) { + compat_uptr_t cp; + + if (get_user(cp, compat_pages + i)) + goto out_flush; + + p = compat_ptr(cp); + } else { + if (get_user(p, pages + i)) + goto out_flush; + } if (get_user(node, nodes + i)) goto out_flush; From 46e61750cfafea17f92add0a89483db7db3b1bda Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 2 Oct 2023 10:04:59 +0300 Subject: [PATCH 087/661] ARM: omap2: fix a debug printk The %pR format string takes a pointer to struct resource, but this is passing a pointer to a pointer which it will print wrong information. Fixes: c63f5b454885 ("ARM: omap2: Use of_range_to_resource() for "ranges" parsing") Signed-off-by: Dan Carpenter Message-ID: Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 1e17b5f775889b..ba71928c0fcb7b 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2209,7 +2209,7 @@ int omap_hwmod_parse_module_range(struct omap_hwmod *oh, return err; pr_debug("omap_hwmod: %s %pOFn at %pR\n", - oh->name, np, &res); + oh->name, np, res); if (oh && oh->mpu_rt_idx) { omap_hwmod_fix_mpu_rt_idx(oh, np, res); From 7eeca8ccd1066c68d6002dbbe26433f8c17c53eb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 6 Oct 2023 17:16:03 -0700 Subject: [PATCH 088/661] ARM: OMAP: timer32K: fix all kernel-doc warnings Fix kernel-doc warnings reported by the kernel test robot: timer32k.c:186: warning: cannot understand function prototype: 'struct timespec64 persistent_ts; ' timer32k.c:191: warning: Function parameter or member 'ts' not described in 'omap_read_persistent_clock64' timer32k.c:216: warning: Function parameter or member 'vbase' not described in 'omap_init_clocksource_32k' timer32k.c:216: warning: Excess function parameter 'pbase' description in 'omap_init_clocksource_32k' timer32k.c:216: warning: Excess function parameter 'size' description in 'omap_init_clocksource_32k' timer32k.c:216: warning: No description found for return value of 'omap_init_clocksource_32k' Fixes: a451570c008b ("ARM: OMAP: 32k counter: Provide y2038-safe omap_read_persistent_clock() replacement") Fixes: 1fe97c8f6a1d ("ARM: OMAP: Make OMAP clocksource source selection using kernel param") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Closes: https://lore.kernel.org/all/202310070106.8QSyJOm3-lkp@intel.com/ Cc: Arnd Bergmann Cc: Vaibhav Hiremath Cc: Felipe Balbi Cc: Tony Lindgren Cc: Xunlei Pang Cc: John Stultz Cc: Ingo Molnar Cc: Aaro Koskinen Cc: Janusz Krzysztofik Cc: linux-omap@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Message-ID: <20231007001603.24972-1-rdunlap@infradead.org> Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/timer32k.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c index 410d17d1d4431e..f618a6df29382b 100644 --- a/arch/arm/mach-omap1/timer32k.c +++ b/arch/arm/mach-omap1/timer32k.c @@ -176,17 +176,18 @@ static u64 notrace omap_32k_read_sched_clock(void) return sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0; } +static struct timespec64 persistent_ts; +static cycles_t cycles; +static unsigned int persistent_mult, persistent_shift; + /** * omap_read_persistent_clock64 - Return time from a persistent clock. + * @ts: &struct timespec64 for the returned time * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into * nsecs and adds to a monotonically increasing timespec64. */ -static struct timespec64 persistent_ts; -static cycles_t cycles; -static unsigned int persistent_mult, persistent_shift; - static void omap_read_persistent_clock64(struct timespec64 *ts) { unsigned long long nsecs; @@ -206,10 +207,9 @@ static void omap_read_persistent_clock64(struct timespec64 *ts) /** * omap_init_clocksource_32k - setup and register counter 32k as a * kernel clocksource - * @pbase: base addr of counter_32k module - * @size: size of counter_32k to map + * @vbase: base addr of counter_32k module * - * Returns 0 upon success or negative error code upon failure. + * Returns: %0 upon success or negative error code upon failure. * */ static int __init omap_init_clocksource_32k(void __iomem *vbase) From 76aca10ccb7c23a7b7a0d56e0bfde2c8cdddfe24 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:57:09 +0200 Subject: [PATCH 089/661] ASoC: soc-dapm: Add helper for comparing widget name Some drivers use one event callback for multiple widgets but still need to perform a bit different actions based on actual widget. This is done by comparing widget name, however drivers tend to miss possible name prefix. Add a helper to solve common mistakes. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155710.821315-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 1 + sound/soc/soc-component.c | 1 + sound/soc/soc-dapm.c | 12 ++++++++++++ 3 files changed, 14 insertions(+) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index d2faec9a323ea3..433543eb82b916 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -469,6 +469,7 @@ void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card); int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s); /* dapm path setup */ int snd_soc_dapm_new_widgets(struct snd_soc_card *card); diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index ba7c0ae82e0079..566033f7dd2ea7 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -242,6 +242,7 @@ int snd_soc_component_notify_control(struct snd_soc_component *component, char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; struct snd_kcontrol *kctl; + /* When updating, change also snd_soc_dapm_widget_name_cmp() */ if (component->name_prefix) snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); else diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index f07e836783737b..312e5557983156 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2728,6 +2728,18 @@ int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream, } EXPORT_SYMBOL_GPL(snd_soc_dapm_update_dai); +int snd_soc_dapm_widget_name_cmp(struct snd_soc_dapm_widget *widget, const char *s) +{ + struct snd_soc_component *component = snd_soc_dapm_to_component(widget->dapm); + const char *wname = widget->name; + + if (component->name_prefix) + wname += strlen(component->name_prefix) + 1; /* plus space */ + + return strcmp(wname, s); +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_widget_name_cmp); + /* * dapm_update_widget_flags() - Re-compute widget sink and source flags * @w: The widget for which to update the flags From c29e5263d32a6d0ec094d425ae7fef3fa8d4da1c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 3 Oct 2023 17:57:10 +0200 Subject: [PATCH 090/661] ASoC: codecs: wsa-macro: handle component name prefix When comparing widget names in wsa_macro_spk_boost_event(), consider also the component's name prefix. Otherwise the WSA codec won't have proper mixer setup resulting in no sound playback through speakers. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231003155710.821315-3-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-wsa-macro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index ec6859ec0d38ec..fff4a8b862a732 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1675,12 +1675,12 @@ static int wsa_macro_spk_boost_event(struct snd_soc_dapm_widget *w, u16 boost_path_ctl, boost_path_cfg1; u16 reg, reg_mix; - if (!strcmp(w->name, "WSA_RX INT0 CHAIN")) { + if (!snd_soc_dapm_widget_name_cmp(w, "WSA_RX INT0 CHAIN")) { boost_path_ctl = CDC_WSA_BOOST0_BOOST_PATH_CTL; boost_path_cfg1 = CDC_WSA_RX0_RX_PATH_CFG1; reg = CDC_WSA_RX0_RX_PATH_CTL; reg_mix = CDC_WSA_RX0_RX_PATH_MIX_CTL; - } else if (!strcmp(w->name, "WSA_RX INT1 CHAIN")) { + } else if (!snd_soc_dapm_widget_name_cmp(w, "WSA_RX INT1 CHAIN")) { boost_path_ctl = CDC_WSA_BOOST1_BOOST_PATH_CTL; boost_path_cfg1 = CDC_WSA_RX1_RX_PATH_CFG1; reg = CDC_WSA_RX1_RX_PATH_CTL; From bfbc79de60c53e5fed505390440b87ef59ee268c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:52 +0200 Subject: [PATCH 091/661] ASoC: codecs: wcd938x: drop bogus bind error handling Drop the bogus error handling for a soundwire device backcast during bind() that cannot fail. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-2-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index a3c68066137775..cf1eaf678fc26d 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3448,10 +3448,6 @@ static int wcd938x_bind(struct device *dev) wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); - if (!wcd938x->tx_sdw_dev) { - dev_err(dev, "could not get txslave with matching of dev\n"); - return -EINVAL; - } /* As TX is main CSR reg interface, which should not be suspended first. * expicilty add the dependency link */ From fa2f8a991ba4aa733ac1c3b1be0c86148aa4c52c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:53 +0200 Subject: [PATCH 092/661] ASoC: codecs: wcd938x: fix unbind tear down order Make sure to deregister the component before tearing down the resources it depends on during unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-3-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index cf1eaf678fc26d..c617fc3ce4890e 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3504,10 +3504,10 @@ static void wcd938x_unbind(struct device *dev) { struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + snd_soc_unregister_component(dev); device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); - snd_soc_unregister_component(dev); component_unbind_all(dev, wcd938x); } From da29b94ed3547cee9d510d02eca4009f2de476cf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:54 +0200 Subject: [PATCH 093/661] ASoC: codecs: wcd938x: fix resource leaks on bind errors Add the missing code to release resources on bind errors, including the references taken by wcd938x_sdw_device_get() which also need to be dropped on unbind(). Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-4-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 44 +++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index c617fc3ce4890e..7e0b07eeed7750 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3435,7 +3435,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->rxdev = wcd938x_sdw_device_get(wcd938x->rxnode); if (!wcd938x->rxdev) { dev_err(dev, "could not find slave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_unbind; } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; @@ -3443,7 +3444,8 @@ static int wcd938x_bind(struct device *dev) wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { dev_err(dev, "could not find txslave with matching of node\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_rxdev; } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; @@ -3454,31 +3456,35 @@ static int wcd938x_bind(struct device *dev) if (!device_link_add(wcd938x->rxdev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink tx and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_txdev; } if (!device_link_add(dev, wcd938x->txdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and tx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rxtx_link; } if (!device_link_add(dev, wcd938x->rxdev, DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME)) { dev_err(dev, "could not devlink wcd and rx\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_tx_link; } wcd938x->regmap = dev_get_regmap(&wcd938x->tx_sdw_dev->dev, NULL); if (!wcd938x->regmap) { dev_err(dev, "could not get TX device regmap\n"); - return -EINVAL; + ret = -EINVAL; + goto err_remove_rx_link; } ret = wcd938x_irq_init(wcd938x, dev); if (ret) { dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); - return ret; + goto err_remove_rx_link; } wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; @@ -3487,17 +3493,33 @@ static int wcd938x_bind(struct device *dev) ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); - return ret; + goto err_remove_rx_link; } ret = snd_soc_register_component(dev, &soc_codec_dev_wcd938x, wcd938x_dais, ARRAY_SIZE(wcd938x_dais)); - if (ret) + if (ret) { dev_err(dev, "%s: Codec registration failed\n", __func__); + goto err_remove_rx_link; + } - return ret; + return 0; +err_remove_rx_link: + device_link_remove(dev, wcd938x->rxdev); +err_remove_tx_link: + device_link_remove(dev, wcd938x->txdev); +err_remove_rxtx_link: + device_link_remove(wcd938x->rxdev, wcd938x->txdev); +err_put_txdev: + put_device(wcd938x->txdev); +err_put_rxdev: + put_device(wcd938x->rxdev); +err_unbind: + component_unbind_all(dev, wcd938x); + + return ret; } static void wcd938x_unbind(struct device *dev) @@ -3508,6 +3530,8 @@ static void wcd938x_unbind(struct device *dev) device_link_remove(dev, wcd938x->txdev); device_link_remove(dev, wcd938x->rxdev); device_link_remove(wcd938x->rxdev, wcd938x->txdev); + put_device(wcd938x->txdev); + put_device(wcd938x->rxdev); component_unbind_all(dev, wcd938x); } From 69a026a2357ee69983690d07976de44ef26ee38a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:55 +0200 Subject: [PATCH 094/661] ASoC: codecs: wcd938x: fix regulator leaks on probe errors Make sure to disable and free the regulators on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-5-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 7e0b07eeed7750..679c627f7eaafc 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3325,8 +3325,10 @@ static int wcd938x_populate_dt_data(struct wcd938x_priv *wcd938x, struct device return dev_err_probe(dev, ret, "Failed to get supplies\n"); ret = regulator_bulk_enable(WCD938X_MAX_SUPPLY, wcd938x->supplies); - if (ret) + if (ret) { + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); return dev_err_probe(dev, ret, "Failed to enable supplies\n"); + } wcd938x_dt_parse_micbias_info(dev, wcd938x); @@ -3592,13 +3594,13 @@ static int wcd938x_probe(struct platform_device *pdev) ret = wcd938x_add_slave_components(wcd938x, dev, &match); if (ret) - return ret; + goto err_disable_regulators; wcd938x_reset(wcd938x); ret = component_master_add_with_match(dev, &wcd938x_comp_ops, match); if (ret) - return ret; + goto err_disable_regulators; pm_runtime_set_autosuspend_delay(dev, 1000); pm_runtime_use_autosuspend(dev); @@ -3608,11 +3610,21 @@ static int wcd938x_probe(struct platform_device *pdev) pm_runtime_idle(dev); return 0; + +err_disable_regulators: + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); + + return ret; } static void wcd938x_remove(struct platform_device *pdev) { + struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + component_master_del(&pdev->dev, &wcd938x_comp_ops); + regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); + regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } #if defined(CONFIG_OF) From 3ebebb2c1eca92a15107b2d7aeff34196fd9e217 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:56 +0200 Subject: [PATCH 095/661] ASoC: codecs: wcd938x: fix runtime PM imbalance on remove Make sure to balance the runtime PM operations, including the disable count, on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-6-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 679c627f7eaafc..d27b919c63b419 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3620,9 +3620,15 @@ static int wcd938x_probe(struct platform_device *pdev) static void wcd938x_remove(struct platform_device *pdev) { - struct wcd938x_priv *wcd938x = dev_get_drvdata(&pdev->dev); + struct device *dev = &pdev->dev; + struct wcd938x_priv *wcd938x = dev_get_drvdata(dev); + + component_master_del(dev, &wcd938x_comp_ops); + + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); - component_master_del(&pdev->dev, &wcd938x_comp_ops); regulator_bulk_disable(WCD938X_MAX_SUPPLY, wcd938x->supplies); regulator_bulk_free(WCD938X_MAX_SUPPLY, wcd938x->supplies); } From f0dfdcbe706462495d47982eecd13a61aabd644d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:57 +0200 Subject: [PATCH 096/661] ASoC: codecs: wcd938x-sdw: fix use after free on driver unbind Make sure to deregister the component when the driver is being unbound and before the underlying device-managed resources are freed. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-7-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 6951120057e55d..1baea04480e27d 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1281,6 +1281,15 @@ static int wcd9380_probe(struct sdw_slave *pdev, return component_add(dev, &wcd938x_sdw_component_ops); } +static int wcd9380_remove(struct sdw_slave *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &wcd938x_sdw_component_ops); + + return 0; +} + static const struct sdw_device_id wcd9380_slave_id[] = { SDW_SLAVE_ENTRY(0x0217, 0x10d, 0), {}, @@ -1320,6 +1329,7 @@ static const struct dev_pm_ops wcd938x_sdw_pm_ops = { static struct sdw_driver wcd9380_codec_driver = { .probe = wcd9380_probe, + .remove = wcd9380_remove, .ops = &wcd9380_slave_ops, .id_table = wcd9380_slave_id, .driver = { From c5c0383082eace13da2ffceeea154db2780165e7 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 3 Oct 2023 17:55:58 +0200 Subject: [PATCH 097/661] ASoC: codecs: wcd938x-sdw: fix runtime PM imbalance on probe errors Make sure to balance the runtime PM operations, including the disable count, on probe errors and on driver unbind. Fixes: 16572522aece ("ASoC: codecs: wcd938x-sdw: add SoundWire driver") Cc: stable@vger.kernel.org # 5.14 Cc: Srinivas Kandagatla Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231003155558.27079-8-johan+linaro@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/wcd938x-sdw.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index 1baea04480e27d..a1f04010da95f6 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1278,7 +1278,18 @@ static int wcd9380_probe(struct sdw_slave *pdev, pm_runtime_set_active(dev); pm_runtime_enable(dev); - return component_add(dev, &wcd938x_sdw_component_ops); + ret = component_add(dev, &wcd938x_sdw_component_ops); + if (ret) + goto err_disable_rpm; + + return 0; + +err_disable_rpm: + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + + return ret; } static int wcd9380_remove(struct sdw_slave *pdev) @@ -1287,6 +1298,10 @@ static int wcd9380_remove(struct sdw_slave *pdev) component_del(dev, &wcd938x_sdw_component_ops); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); + return 0; } From af5fd122d7bd739a2b66405f6e8ab92557279325 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 6 Oct 2023 17:44:05 +0100 Subject: [PATCH 098/661] ASoC: cs35l56: Fix illegal use of init_completion() Fix cs35l56_patch() to call reinit_completion() to reinitialize the completion object. It was incorrectly using init_completion(). Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://lore.kernel.org/r/20231006164405.253796-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index f2e7c6d0be46c8..9c2d9cbc63d375 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -706,7 +706,7 @@ static void cs35l56_patch(struct cs35l56_private *cs35l56) mutex_lock(&cs35l56->base.irq_lock); - init_completion(&cs35l56->init_completion); + reinit_completion(&cs35l56->init_completion); cs35l56->soft_resetting = true; cs35l56_system_reset(&cs35l56->base, !!cs35l56->sdw_peripheral); From aa6464edbd51af4a2f8db43df866a7642b244b5f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 5 Oct 2023 17:00:24 +0300 Subject: [PATCH 099/661] ASoC: pxa: fix a memory leak in probe() Free the "priv" pointer before returning the error code. Fixes: 90eb6b59d311 ("ASoC: pxa-ssp: add support for an external clock in devicetree") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/84ac2313-1420-471a-b2cb-3269a2e12a7c@moroto.mountain Signed-off-by: Mark Brown --- sound/soc/pxa/pxa-ssp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index b70034c07eeea5..b8a3cb8b75978e 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -773,7 +773,7 @@ static int pxa_ssp_probe(struct snd_soc_dai *dai) if (IS_ERR(priv->extclk)) { ret = PTR_ERR(priv->extclk); if (ret == -EPROBE_DEFER) - return ret; + goto err_priv; priv->extclk = NULL; } From c6df843348d6b71ea986266c12831cb60c2cf325 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 6 Oct 2023 10:21:04 +0200 Subject: [PATCH 100/661] regmap: fix NULL deref on lookup Not all regmaps have a name so make sure to check for that to avoid dereferencing a NULL pointer when dev_get_regmap() is used to lookup a named regmap. Fixes: e84861fec32d ("regmap: dev_get_regmap_match(): fix string comparison") Cc: stable@vger.kernel.org # 5.8 Cc: Marc Kleine-Budde Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20231006082104.16707-1-johan+linaro@kernel.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 884cb51c8f6729..234a84ecde8b1b 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1478,7 +1478,7 @@ static int dev_get_regmap_match(struct device *dev, void *res, void *data) /* If the user didn't specify a name match any */ if (data) - return !strcmp((*r)->name, data); + return (*r)->name && !strcmp((*r)->name, data); else return 1; } From a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 5 Oct 2023 11:06:36 +0200 Subject: [PATCH 101/661] x86/sev: Disable MMIO emulation from user mode A virt scenario can be constructed where MMIO memory can be user memory. When that happens, a race condition opens between when the hardware raises the #VC and when the #VC handler gets to emulate the instruction. If the MOVS is replaced with a MOVS accessing kernel memory in that small race window, then write to kernel memory happens as the access checks are not done at emulation time. Disable MMIO emulation in user mode temporarily until a sensible use case appears and justifies properly handling the race window. Fixes: 0118b604c2c9 ("x86/sev-es: Handle MMIO String Instructions") Reported-by: Tom Dohrmann Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: --- arch/x86/kernel/sev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 2787826d9f6077..4ee14e647ae62b 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1509,6 +1509,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ES_DECODE_FAILED; } + if (user_mode(ctxt->regs)) + return ES_UNSUPPORTED; + switch (mmio) { case INSN_MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); From b9cb9c45583b911e0db71d09caa6b56469eb2bdf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Jun 2023 17:42:42 +0200 Subject: [PATCH 102/661] x86/sev: Check IOBM for IOIO exceptions from user-space Check the IO permission bitmap (if present) before emulating IOIO #VC exceptions for user-space. These permissions are checked by hardware already before the #VC is raised, but due to the VC-handler decoding race it needs to be checked again in software. Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") Reported-by: Tom Dohrmann Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: --- arch/x86/boot/compressed/sev.c | 5 +++++ arch/x86/kernel/sev-shared.c | 22 +++++++++++++++------- arch/x86/kernel/sev.c | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index dc8c876fbd8f8f..afd91041ec3ed0 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -103,6 +103,11 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + #undef __init #define __init diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 2eabccde94fb31..bcd77c48be0a5a 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -656,6 +656,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; + size_t size; + u64 port; + *exitinfo = 0; switch (insn->opcode.bytes[0]) { @@ -664,7 +667,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ @@ -672,41 +675,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } + *exitinfo |= port << 16; + switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: @@ -716,12 +721,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; + size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; + size = (insn->opnd_bytes == 2) ? 2 : 4; } + switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; @@ -737,7 +745,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; - return ES_OK; + return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4ee14e647ae62b..0f218932e844c5 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -524,6 +524,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + BUG_ON(size > 4); + + if (user_mode(ctxt->regs)) { + struct thread_struct *t = ¤t->thread; + struct io_bitmap *iobm = t->io_bitmap; + size_t idx; + + if (!iobm) + goto fault; + + for (idx = port; idx < port + size; ++idx) { + if (test_bit(idx, iobm->bitmap)) + goto fault; + } + } + + return ES_OK; + +fault: + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + + return ES_EXCEPTION; +} + /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" From 1bba0badff0ede8dc51641cff4b153422baa3369 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 9 Oct 2023 16:34:12 +0100 Subject: [PATCH 103/661] ASoC: cs35l56: ASP1 DOUT must default to Hi-Z when not transmitting The ASP1 DOUT line must be defaulted to be high-impedance when it is not actually transmitting data for an active channel. In non-SoundWire modes ASP1 will usually be shared by multiple amps so each amp must only drive the line during the slot for an enabled TX channel. In SoundWire mode a custom firmware can use ASP1 as a secondary chip-to-chip audio link or as GPIO. It should be defaulted to high-impedance since by default the purpose of this pin is not known. Backport note: On kernel versions before 6.6 the cs35l56->base.regmap argument to regmap_set_bits() must be changed to cs35l56->regmap. Signed-off-by: Richard Fitzgerald Fixes: e49611252900 ("ASoC: cs35l56: Add driver for Cirrus Logic CS35L56") Link: https://lore.kernel.org/r/20231009153412.30380-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l56.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 9c2d9cbc63d375..f9059780b7a7b6 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1186,6 +1186,12 @@ int cs35l56_init(struct cs35l56_private *cs35l56) /* Registers could be dirty after soft reset or SoundWire enumeration */ regcache_sync(cs35l56->base.regmap); + /* Set ASP1 DOUT to high-impedance when it is not transmitting audio data. */ + ret = regmap_set_bits(cs35l56->base.regmap, CS35L56_ASP1_CONTROL3, + CS35L56_ASP1_DOUT_HIZ_CTRL_MASK); + if (ret) + return dev_err_probe(cs35l56->base.dev, ret, "Failed to write ASP1_CONTROL3\n"); + cs35l56->base.init_done = true; complete(&cs35l56->init_completion); From 1558b1a8dd388f5fcc3abc1e24de854a295044c3 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Sun, 8 Oct 2023 11:29:08 +0800 Subject: [PATCH 104/661] firmware/imx-dsp: Fix use_after_free in imx_dsp_setup_channels() dsp_chan->name and chan_name points to same block of memory, because dev_err still needs to be used it,so we need free it's memory after use to avoid use_after_free. Fixes: e527adfb9b7d ("firmware: imx-dsp: Fix an error handling path in imx_dsp_setup_channels()") Signed-off-by: Hao Ge Reviewed-by: Daniel Baluta Signed-off-by: Shawn Guo --- drivers/firmware/imx/imx-dsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/imx/imx-dsp.c b/drivers/firmware/imx/imx-dsp.c index 508eab346fc676..a48a58e0c61f8f 100644 --- a/drivers/firmware/imx/imx-dsp.c +++ b/drivers/firmware/imx/imx-dsp.c @@ -114,11 +114,11 @@ static int imx_dsp_setup_channels(struct imx_dsp_ipc *dsp_ipc) dsp_chan->idx = i % 2; dsp_chan->ch = mbox_request_channel_byname(cl, chan_name); if (IS_ERR(dsp_chan->ch)) { - kfree(dsp_chan->name); ret = PTR_ERR(dsp_chan->ch); if (ret != -EPROBE_DEFER) dev_err(dev, "Failed to request mbox chan %s ret %d\n", chan_name, ret); + kfree(dsp_chan->name); goto out; } From dad4e491e30b20f4dc615c9da65d2142d703b5c2 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Sat, 7 Oct 2023 08:59:53 +0800 Subject: [PATCH 105/661] net: ipv6: fix return value check in esp_remove_trailer In esp_remove_trailer(), to avoid an unexpected result returned by pskb_trim, we should check the return value of pskb_trim(). Signed-off-by: Ma Ke Signed-off-by: Steffen Klassert --- net/ipv6/esp6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index fddd0cbdede15b..e023d29e919c17 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -770,7 +770,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; From 513f61e2193350c7a345da98559b80f61aec4fa6 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Mon, 9 Oct 2023 09:13:37 +0800 Subject: [PATCH 106/661] net: ipv4: fix return value check in esp_remove_trailer In esp_remove_trailer(), to avoid an unexpected result returned by pskb_trim, we should check the return value of pskb_trim(). Signed-off-by: Ma Ke Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 2be2d492255739..d18f0f092fe73d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -732,7 +732,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb) skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } - pskb_trim(skb, skb->len - trimlen); + ret = pskb_trim(skb, skb->len - trimlen); + if (unlikely(ret)) + return ret; ret = nexthdr[1]; From 53ba32acb5ab137ba333c20e0c987bdd6273a366 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 10 Oct 2023 11:24:24 +0100 Subject: [PATCH 107/661] ASoC: dt-bindings: cirrus,cs42l43: Update values for bias sense Due to an error in the datasheet the bias sense values currently don't match the hardware. Whilst this is a change to the binding no devices have yet shipped so updating the binding will not cause any issues. Acked-by: Krzysztof Kozlowski Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231010102425.3662364-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml index 7a6de938b11d10..4118aa54bbd553 100644 --- a/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml +++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l43.yaml @@ -82,7 +82,7 @@ properties: description: Current at which the headset micbias sense clamp will engage, 0 to disable. - enum: [ 0, 14, 23, 41, 50, 60, 68, 86, 95 ] + enum: [ 0, 14, 24, 43, 52, 61, 71, 90, 99 ] default: 0 cirrus,bias-ramp-ms: From 99d426c6dd2d6f9734617ec12def856ee35b9218 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 10 Oct 2023 11:24:25 +0100 Subject: [PATCH 108/661] ASoC: cs42l43: Update values for bias sense Due to an error in the datasheet the bias sense values currently don't match the hardware. Whilst this is a change to the binding no devices have yet shipped so updating the binding will not cause any issues. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20231010102425.3662364-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l43-jack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l43-jack.c b/sound/soc/codecs/cs42l43-jack.c index 92e37bc1df9dcc..9f5f1a92561d1e 100644 --- a/sound/soc/codecs/cs42l43-jack.c +++ b/sound/soc/codecs/cs42l43-jack.c @@ -34,7 +34,7 @@ static const unsigned int cs42l43_accdet_db_ms[] = { static const unsigned int cs42l43_accdet_ramp_ms[] = { 10, 40, 90, 170 }; static const unsigned int cs42l43_accdet_bias_sense[] = { - 14, 23, 41, 50, 60, 68, 86, 95, 0, + 14, 24, 43, 52, 61, 71, 90, 99, 0, }; static int cs42l43_find_index(struct cs42l43_codec *priv, const char * const prop, From d920abd1e7c4884f9ecd0749d1921b7ab19ddfbd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 2 Oct 2023 13:54:28 +0300 Subject: [PATCH 109/661] nvmet-tcp: Fix a possible UAF in queue intialization setup From Alon: "Due to a logical bug in the NVMe-oF/TCP subsystem in the Linux kernel, a malicious user can cause a UAF and a double free, which may lead to RCE (may also lead to an LPE in case the attacker already has local privileges)." Hence, when a queue initialization fails after the ahash requests are allocated, it is guaranteed that the queue removal async work will be called, hence leave the deallocation to the queue removal. Also, be extra careful not to continue processing the socket, so set queue rcv_state to NVMET_TCP_RECV_ERR upon a socket error. Cc: stable@vger.kernel.org Reported-by: Alon Zahavi Tested-by: Alon Zahavi Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cd92d7ddf5ed15..197fc2ecb164dc 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -372,6 +372,7 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { + queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else @@ -910,15 +911,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) - goto free_crypto; + return ret; /* queue removal will cleanup */ queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; -free_crypto: - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - return ret; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, From 4ae55a7dce04989f289d5c5c8c8e5c37adc36c71 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 4 Sep 2023 17:26:38 +0200 Subject: [PATCH 110/661] nvme-auth: use chap->s2 to indicate bidirectional authentication Commit 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") replaced the condition "if (ctrl->ctrl_key)" (indicating bidirectional auth) by "if (chap->ctrl_key)", because ctrl->ctrl_key is a resource shared with sysfs. But chap->ctrl_key is set in nvme_auth_process_dhchap_challenge() depending on the DHVLEN in the DH-HMAC-CHAP Challenge message received from the controller, and will thus be non-NULL for every DH-HMAC-CHAP exchange, even if unidirectional auth was requested. This will lead to a protocol violation by sending a Success2 message in the unidirectional case (per NVMe base spec 2.0, the authentication transaction ends after the Success1 message for unidirectional auth). Use chap->s2 instead, which is non-zero if and only if the host requested bi-directional authentication from the controller. Fixes: 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") Signed-off-by: Martin Wilck Reviewed-by: Daniel Wagner Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index daf5d144a8eaf7..064592a5d546a6 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -341,7 +341,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvmf_auth_dhchap_success1_data *data = chap->buf; size_t size = sizeof(*data); - if (chap->ctrl_key) + if (chap->s2) size += chap->hash_len; if (size > CHAP_BUF_SIZE) { @@ -825,7 +825,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->ctrl_key) { + if (chap->s2) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); From 7b695ef6696e248aad02b17ca3ba088db2d59c31 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 5 Oct 2023 10:36:50 +0200 Subject: [PATCH 111/661] dt-bindings: iio: add missing reset-gpios constrain The Documentation/devicetree/bindings/gpio/gpio-consumer-common.yaml schema does not enforce number of reset GPIOs, thus each device binding must do it. Signed-off-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20231005083650.92222-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml | 3 ++- Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml | 3 ++- Documentation/devicetree/bindings/iio/health/ti,afe4403.yaml | 3 ++- Documentation/devicetree/bindings/iio/health/ti,afe4404.yaml | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml index 2594fa192f93d0..2a04906531fb0e 100644 --- a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml +++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml @@ -32,7 +32,8 @@ properties: spi-cpol: true - reset-gpios: true + reset-gpios: + maxItems: 1 interrupts: minItems: 1 diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml index 4e508bfcc9d87e..5121685337b5ed 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml @@ -78,7 +78,8 @@ properties: - const: -1000 - const: 22000 - reset-gpios: true + reset-gpios: + maxItems: 1 adi,dc-dc-ilim-microamp: enum: [150000, 200000, 250000, 300000, 350000, 400000] diff --git a/Documentation/devicetree/bindings/iio/health/ti,afe4403.yaml b/Documentation/devicetree/bindings/iio/health/ti,afe4403.yaml index b9b5beac33b2e9..5b6cde86b5a5cb 100644 --- a/Documentation/devicetree/bindings/iio/health/ti,afe4403.yaml +++ b/Documentation/devicetree/bindings/iio/health/ti,afe4403.yaml @@ -23,7 +23,8 @@ properties: maxItems: 1 description: Connected to ADC_RDY pin. - reset-gpios: true + reset-gpios: + maxItems: 1 required: - compatible diff --git a/Documentation/devicetree/bindings/iio/health/ti,afe4404.yaml b/Documentation/devicetree/bindings/iio/health/ti,afe4404.yaml index 2958c4ca75b485..167d10bd60af97 100644 --- a/Documentation/devicetree/bindings/iio/health/ti,afe4404.yaml +++ b/Documentation/devicetree/bindings/iio/health/ti,afe4404.yaml @@ -23,7 +23,8 @@ properties: maxItems: 1 description: Connected to ADC_RDY pin. - reset-gpios: true + reset-gpios: + maxItems: 1 additionalProperties: false From 54f1840ddee9bbdc8dd89fbbfdfa632401244146 Mon Sep 17 00:00:00 2001 From: Jian Zhang Date: Fri, 6 Oct 2023 10:22:33 +0800 Subject: [PATCH 112/661] i2c: aspeed: Fix i2c bus hang in slave read When the `CONFIG_I2C_SLAVE` option is enabled and the device operates as a slave, a situation arises where the master sends a START signal without the accompanying STOP signal. This action results in a persistent I2C bus timeout. The core issue stems from the fact that the i2c controller remains in a slave read state without a timeout mechanism. As a consequence, the bus perpetually experiences timeouts. In this case, the i2c bus will be reset, but the slave_state reset is missing. Fixes: fee465150b45 ("i2c: aspeed: Reset the i2c controller when timeout occurs") Signed-off-by: Jian Zhang Acked-by: Andi Shyti Tested-by: Andrew Jeffery Reviewed-by: Andrew Jeffery Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-aspeed.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 5a416b39b81836..28e2a5fc45282d 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -749,6 +749,8 @@ static void __aspeed_i2c_reg_slave(struct aspeed_i2c_bus *bus, u16 slave_addr) func_ctrl_reg_val = readl(bus->base + ASPEED_I2C_FUN_CTRL_REG); func_ctrl_reg_val |= ASPEED_I2CD_SLAVE_EN; writel(func_ctrl_reg_val, bus->base + ASPEED_I2C_FUN_CTRL_REG); + + bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; } static int aspeed_i2c_reg_slave(struct i2c_client *client) @@ -765,7 +767,6 @@ static int aspeed_i2c_reg_slave(struct i2c_client *client) __aspeed_i2c_reg_slave(bus, client->addr); bus->slave = client; - bus->slave_state = ASPEED_I2C_SLAVE_INACTIVE; spin_unlock_irqrestore(&bus->lock, flags); return 0; From 4d73c6772ab771cbbe7e46a73e7c78ba490350fa Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 4 Oct 2023 11:19:15 -0700 Subject: [PATCH 113/661] platform/x86: intel-uncore-freq: Conditionally create attribute for read frequency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the current uncore frequency can't be read, don't create attribute "current_freq_khz" as any read will fail later. Some user space applications like turbostat fail to continue with the failure. So, check error during attribute creation. Fixes: 414eef27283a ("platform/x86/intel/uncore-freq: Display uncore current frequency") Signed-off-by: Srinivas Pandruvada Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231004181915.1887913-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../x86/intel/uncore-frequency/uncore-frequency-common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 1152deaa0078e9..33ab207493e3e6 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -176,7 +176,7 @@ show_uncore_data(initial_max_freq_khz); static int create_attr_group(struct uncore_data *data, char *name) { - int ret, index = 0; + int ret, freq, index = 0; init_attribute_rw(max_freq_khz); init_attribute_rw(min_freq_khz); @@ -197,7 +197,11 @@ static int create_attr_group(struct uncore_data *data, char *name) data->uncore_attrs[index++] = &data->min_freq_khz_dev_attr.attr; data->uncore_attrs[index++] = &data->initial_min_freq_khz_dev_attr.attr; data->uncore_attrs[index++] = &data->initial_max_freq_khz_dev_attr.attr; - data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + + ret = uncore_read_freq(data, &freq); + if (!ret) + data->uncore_attrs[index++] = &data->current_freq_khz_dev_attr.attr; + data->uncore_attrs[index] = NULL; data->uncore_attr_group.name = name; From 6284e67aa6cb3af870ed11dfcfafd80fd927777b Mon Sep 17 00:00:00 2001 From: Nikita Kravets Date: Fri, 6 Oct 2023 20:53:53 +0300 Subject: [PATCH 114/661] platform/x86: msi-ec: Fix the 3rd config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the charge control address of CONF3 and remove an incorrect firmware version which turned out to be a BIOS firmware and not an EC firmware. Fixes: 392cacf2aa10 ("platform/x86: Add new msi-ec driver") Cc: Aakash Singh Cc: Jose Angel Pastrana Signed-off-by: Nikita Kravets Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20231006175352.1753017-5-teackot@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/msi-ec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/platform/x86/msi-ec.c b/drivers/platform/x86/msi-ec.c index f26a3121092f97..492eb383ee7a9c 100644 --- a/drivers/platform/x86/msi-ec.c +++ b/drivers/platform/x86/msi-ec.c @@ -276,14 +276,13 @@ static struct msi_ec_conf CONF2 __initdata = { static const char * const ALLOWED_FW_3[] __initconst = { "1592EMS1.111", - "E1592IMS.10C", NULL }; static struct msi_ec_conf CONF3 __initdata = { .allowed_fw = ALLOWED_FW_3, .charge_control = { - .address = 0xef, + .address = 0xd7, .offset_start = 0x8a, .offset_end = 0x80, .range_min = 0x8a, From 51064b7acf665bfa2c929d7cafb7ad494b7d2783 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 8 Oct 2023 01:39:28 +0200 Subject: [PATCH 115/661] platform/x86: wmi: Update MAINTAINERS entry Since 2011, the WMI subsystem is marked as orphaned, which means that a important part of platform support for modern notebooks and even desktops is receiving not enough maintenance. So i decided to take over the maintenance of the WMI subsystem. Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20231007233933.72121-2-W_Armin@gmx.de Acked-by: Hans de Goede Signed-off-by: Hans de Goede --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dbf1668dcd842e..b38fcbaa24f95b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -378,8 +378,9 @@ F: drivers/acpi/viot.c F: include/linux/acpi_viot.h ACPI WMI DRIVER +M: Armin Wolf L: platform-driver-x86@vger.kernel.org -S: Orphan +S: Maintained F: Documentation/driver-api/wmi.rst F: Documentation/wmi/ F: drivers/platform/x86/wmi.c From c15cdea517414e0b29a11e0a0e2443d127c9109b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 6 Oct 2023 17:39:34 +0100 Subject: [PATCH 116/661] mm: slab: Do not create kmalloc caches smaller than arch_slab_minalign() Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") allows architectures with non-coherent DMA to define a small ARCH_KMALLOC_MINALIGN (e.g. sizeof(unsigned long long)) and this has been enabled on arm64. With KASAN_HW_TAGS enabled, however, ARCH_SLAB_MINALIGN becomes 16 on arm64 (arch_slab_minalign() dynamically selects it since commit d949a8155d13 ("mm: make minimum slab alignment a runtime property")). This can lead to a situation where kmalloc-8 caches are attempted to be created with a kmem_caches.size aligned to 16. When the cache is mergeable, it can lead to kernel warnings like: sysfs: cannot create duplicate filename '/kernel/slab/:d-0000016' CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.6.0-rc1-00001-gda98843cd306-dirty #5 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: dump_backtrace+0x90/0xe8 show_stack+0x18/0x24 dump_stack_lvl+0x48/0x60 dump_stack+0x18/0x24 sysfs_warn_dup+0x64/0x80 sysfs_create_dir_ns+0xe8/0x108 kobject_add_internal+0x98/0x264 kobject_init_and_add+0x8c/0xd8 sysfs_slab_add+0x12c/0x248 slab_sysfs_init+0x98/0x14c do_one_initcall+0x6c/0x1b0 kernel_init_freeable+0x1c0/0x288 kernel_init+0x24/0x1e0 ret_from_fork+0x10/0x20 kobject: kobject_add_internal failed for :d-0000016 with -EEXIST, don't try to register things with the same name in the same directory. SLUB: Unable to add boot slab dma-kmalloc-8 to sysfs Limit the __kmalloc_minalign() return value (used to create the kmalloc-* caches) to arch_slab_minalign() so that kmalloc-8 caches are skipped when KASAN_HW_TAGS is enabled (both config and runtime). Reported-by: Mark Rutland Fixes: b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment if DMA bouncing possible") Signed-off-by: Catalin Marinas Cc: Peter Collingbourne Cc: stable@vger.kernel.org # 6.5.x Signed-off-by: Vlastimil Babka --- mm/slab_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 8fda308e400db8..9bbffe82d65af1 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -895,10 +895,13 @@ void __init setup_kmalloc_cache_index_table(void) static unsigned int __kmalloc_minalign(void) { + unsigned int minalign = dma_get_cache_alignment(); + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && is_swiotlb_allocated()) - return ARCH_KMALLOC_MINALIGN; - return dma_get_cache_alignment(); + minalign = ARCH_KMALLOC_MINALIGN; + + return max(minalign, arch_slab_minalign()); } void __init From f588d72bd95f748849685412b1f0c7959ca228cf Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 18 Sep 2023 23:30:20 -0700 Subject: [PATCH 117/661] nfs42: client needs to strip file mode's suid/sgid bit after ALLOCATE op The Linux NFS server strips the SUID and SGID from the file mode on ALLOCATE op. Modify _nfs42_proc_fallocate to add NFS_INO_REVAL_FORCED to nfs_set_cache_invalid's argument to force update of the file mode suid/sgid bit. Suggested-by: Trond Myklebust Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Tested-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 063e00aff87edb..28704f924612c4 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -81,7 +81,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, if (status == 0) { if (nfs_should_remove_suid(inode)) { spin_lock(&inode->i_lock); - nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE); + nfs_set_cache_invalid(inode, + NFS_INO_REVAL_FORCED | NFS_INO_INVALID_MODE); spin_unlock(&inode->i_lock); } status = nfs_post_op_update_inode_force_wcc(inode, From 91d20ab9d9ca035527af503d00e1e30d6c375f2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 9 Oct 2023 10:18:01 +0200 Subject: [PATCH 118/661] wifi: cfg80211: use system_unbound_wq for wiphy work Since wiphy work items can run pretty much arbitrary code in the stack/driver, it can take longer to run all of this, so we shouldn't be using system_wq via schedule_work(). Also, we lock the wiphy (which is the reason this exists), so use system_unbound_wq. Reported-and-tested-by: Kalle Valo Fixes: a3ee4dc84c4e ("wifi: cfg80211: add a work abstraction with special semantics") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/core.c b/net/wireless/core.c index 64e8616171104f..acec41c1809a82 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1622,7 +1622,7 @@ void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work) list_add_tail(&work->entry, &rdev->wiphy_work_list); spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags); - schedule_work(&rdev->wiphy_work); + queue_work(system_unbound_wq, &rdev->wiphy_work); } EXPORT_SYMBOL_GPL(wiphy_work_queue); From 02e0e426a2fb1446ebd7bc0eccfb48aedefb966b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Oct 2023 23:09:18 +0200 Subject: [PATCH 119/661] wifi: mac80211: fix error path key leak In the previous key leak fix for the other error paths, I meant to unify all of them to the same place, but used the wrong label, which I noticed when doing the merge into wireless-next. Fix it. Fixes: d097ae01ebd4 ("wifi: mac80211: fix potential key leak") Signed-off-by: Johannes Berg --- net/mac80211/key.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0665ff5e456eb4..a2db0585dce0d4 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -912,7 +912,7 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (ieee80211_key_identical(sdata, old_key, key)) { ret = -EALREADY; - goto unlock; + goto out; } key->local = sdata->local; @@ -940,7 +940,6 @@ int ieee80211_key_link(struct ieee80211_key *key, out: ieee80211_key_free_unused(key); - unlock: mutex_unlock(&sdata->local->key_mtx); return ret; From b2f750c3a80b285cd60c9346f8c96bd0a2a66cde Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Wed, 4 Oct 2023 18:39:28 +0200 Subject: [PATCH 120/661] net: rfkill: gpio: prevent value glitch during probe When either reset- or shutdown-gpio have are initially deasserted, e.g. after a reboot - or when the hardware does not include pull-down, there will be a short toggle of both IOs to logical 0 and back to 1. It seems that the rfkill default is unblocked, so the driver should not glitch to output low during probe. It can lead e.g. to unexpected lte modem reconnect: [1] root@localhost:~# dmesg | grep "usb 2-1" [ 2.136124] usb 2-1: new SuperSpeed USB device number 2 using xhci-hcd [ 21.215278] usb 2-1: USB disconnect, device number 2 [ 28.833977] usb 2-1: new SuperSpeed USB device number 3 using xhci-hcd The glitch has been discovered on an arm64 board, now that device-tree support for the rfkill-gpio driver has finally appeared :). Change the flags for devm_gpiod_get_optional from GPIOD_OUT_LOW to GPIOD_ASIS to avoid any glitches. The rfkill driver will set the intended value during rfkill_sync_work. Fixes: 7176ba23f8b5 ("net: rfkill: add generic gpio rfkill driver") Signed-off-by: Josua Mayer Link: https://lore.kernel.org/r/20231004163928.14609-1-josua@solid-run.com Signed-off-by: Johannes Berg --- net/rfkill/rfkill-gpio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index e9d1b2f2ff0ad5..5a81505fba9ac4 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -108,13 +108,13 @@ static int rfkill_gpio_probe(struct platform_device *pdev) rfkill->clk = devm_clk_get(&pdev->dev, NULL); - gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); rfkill->reset_gpio = gpio; - gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW); + gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS); if (IS_ERR(gpio)) return PTR_ERR(gpio); From f2ac54ebf85615a6d78f5eb213a8bbeeb17ebe5d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Oct 2023 16:55:10 +0200 Subject: [PATCH 121/661] net: rfkill: reduce data->mtx scope in rfkill_fop_open In syzbot runs, lockdep reports that there's a (potential) deadlock here of data->mtx being locked recursively. This isn't really a deadlock since they are different instances, but lockdep cannot know, and teaching it would be far more difficult than other fixes. At the same time we don't even really _need_ the mutex to be locked in rfkill_fop_open(), since we're modifying only a completely fresh instance of 'data' (struct rfkill_data) that's not yet added to the global list. However, to avoid any reordering etc. within the globally locked section, and to make the code look more symmetric, we should still lock the data->events list manipulation, but also need to lock _only_ that. So do that. Reported-by: syzbot+509238e523e032442b80@syzkaller.appspotmail.com Fixes: 2c3dfba4cf84 ("rfkill: sync before userspace visibility/changes") Signed-off-by: Johannes Berg --- net/rfkill/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 08630896b6c887..14cc8fe8584bd1 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1180,7 +1180,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) init_waitqueue_head(&data->read_wait); mutex_lock(&rfkill_global_mutex); - mutex_lock(&data->mtx); /* * start getting events from elsewhere but hold mtx to get * startup events added first @@ -1192,10 +1191,11 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) goto free; rfkill_sync(rfkill); rfkill_fill_event(&ev->ev, rfkill, RFKILL_OP_ADD); + mutex_lock(&data->mtx); list_add_tail(&ev->list, &data->events); + mutex_unlock(&data->mtx); } list_add(&data->list, &rfkill_fds); - mutex_unlock(&data->mtx); mutex_unlock(&rfkill_global_mutex); file->private_data = data; @@ -1203,7 +1203,6 @@ static int rfkill_fop_open(struct inode *inode, struct file *file) return stream_open(inode, file); free: - mutex_unlock(&data->mtx); mutex_unlock(&rfkill_global_mutex); mutex_destroy(&data->mtx); list_for_each_entry_safe(ev, tmp, &data->events, list) From 6a6d4644ce935ddec4f76223ac0ca68da56bd2d3 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Wed, 11 Oct 2023 10:43:26 -0400 Subject: [PATCH 122/661] NFS: Fix potential oops in nfs_inode_remove_request() Once a folio's private data has been cleared, it's possible for another process to clear the folio->mapping (e.g. via invalidate_complete_folio2 or evict_mapping_folio), so it wouldn't be safe to call nfs_page_to_inode() after that. Fixes: 0c493b5cf16e ("NFS: Convert buffered writes to use folios") Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7720b5e43014b4..9d82d50ce0b12d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -788,6 +788,8 @@ static void nfs_inode_add_request(struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { + struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio_file_mapping(folio); @@ -802,7 +804,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { - atomic_long_dec(&NFS_I(nfs_page_to_inode(req))->nrequests); + atomic_long_dec(&nfsi->nrequests); nfs_release_request(req); } } From d6cbc6a3a856a7d8047316d81e2e039e44432acb Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 11 Oct 2023 14:48:53 +0100 Subject: [PATCH 123/661] ASoC: cs42l42: Fix missing include of gpio/consumer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to gpiod_set_value_cansleep() in cs42l42_sdw_update_status() needs the header file gpio/consumer.h to be included. This was introduced by commit 2d066c6a7865 ("ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset") and caused error: sound/soc/codecs/cs42l42-sdw.c:374:4: error: implicit declaration of function ‘gpiod_set_value_cansleep’; did you mean gpio_set_value_cansleep’? Signed-off-by: Richard Fitzgerald Fixes: 2d066c6a7865 ("ASoC: cs42l42: Avoid stale SoundWire ATTACH after hard reset") Link: https://lore.kernel.org/r/20231011134853.20059-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42-sdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs42l42-sdw.c b/sound/soc/codecs/cs42l42-sdw.c index 974bae4abfad1c..94a66a325303b6 100644 --- a/sound/soc/codecs/cs42l42-sdw.c +++ b/sound/soc/codecs/cs42l42-sdw.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include From 92d4abd66f7080075793970fc8f241239e58a9e7 Mon Sep 17 00:00:00 2001 From: Arkadiusz Bokowy Date: Wed, 20 Sep 2023 17:30:07 +0200 Subject: [PATCH 124/661] Bluetooth: vhci: Fix race when opening vhci device When the vhci device is opened in the two-step way, i.e.: open device then write a vendor packet with requested controller type, the device shall respond with a vendor packet which includes HCI index of created interface. When the virtual HCI is created, the host sends a reset request to the controller. This request is processed by the vhci_send_frame() function. However, this request is send by a different thread, so it might happen that this HCI request will be received before the vendor response is queued in the read queue. This results in the HCI vendor response and HCI reset request inversion in the read queue which leads to improper behavior of btvirt: > dmesg [1754256.640122] Bluetooth: MGMT ver 1.22 [1754263.023806] Bluetooth: MGMT ver 1.22 [1754265.043775] Bluetooth: hci1: Opcode 0x c03 failed: -110 In order to synchronize vhci two-step open/setup process with virtual HCI initialization, this patch adds internal lock when queuing data in the vhci_send_frame() function. Signed-off-by: Arkadiusz Bokowy Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 40e2b9fa11a261..f3892e9ce800ff 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -74,7 +74,10 @@ static int vhci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) struct vhci_data *data = hci_get_drvdata(hdev); memcpy(skb_push(skb, 1), &hci_skb_pkt_type(skb), 1); + + mutex_lock(&data->open_mutex); skb_queue_tail(&data->readq, skb); + mutex_unlock(&data->open_mutex); wake_up_interruptible(&data->read_wait); return 0; From acab8ff29a2a226409cfe04e6d2e0896928c1b3a Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Thu, 28 Sep 2023 10:52:57 +0300 Subject: [PATCH 125/661] Bluetooth: ISO: Fix invalid context error This moves the hci_le_terminate_big_sync call from rx_work to cmd_sync_work, to avoid calling sleeping function from an invalid context. Reported-by: syzbot+c715e1bd8dfbcb1ab176@syzkaller.appspotmail.com Fixes: a0bfde167b50 ("Bluetooth: ISO: Add support for connecting multiple BISes") Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 31d02b54eea119..e6cfc65abcb865 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7021,6 +7021,14 @@ static void hci_le_cis_req_evt(struct hci_dev *hdev, void *data, hci_dev_unlock(hdev); } +static int hci_iso_term_big_sync(struct hci_dev *hdev, void *data) +{ + u8 handle = PTR_UINT(data); + + return hci_le_terminate_big_sync(hdev, handle, + HCI_ERROR_LOCAL_HOST_TERM); +} + static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -7065,16 +7073,17 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, rcu_read_lock(); } + rcu_read_unlock(); + if (!ev->status && !i) /* If no BISes have been connected for the BIG, * terminate. This is in case all bound connections * have been closed before the BIG creation * has completed. */ - hci_le_terminate_big_sync(hdev, ev->handle, - HCI_ERROR_LOCAL_HOST_TERM); + hci_cmd_sync_queue(hdev, hci_iso_term_big_sync, + UINT_PTR(ev->handle), NULL); - rcu_read_unlock(); hci_dev_unlock(hdev); } From 33155c4aae5260475def6f7438e4e35564f4f3ba Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 1 Oct 2023 16:59:31 +0800 Subject: [PATCH 126/661] Bluetooth: hci_event: Ignore NULL link key This change is used to relieve CVE-2020-26555. The description of the CVE: Bluetooth legacy BR/EDR PIN code pairing in Bluetooth Core Specification 1.0B through 5.2 may permit an unauthenticated nearby device to spoof the BD_ADDR of the peer device to complete pairing without knowledge of the PIN. [1] The detail of this attack is in IEEE paper: BlueMirror: Reflections on Bluetooth Pairing and Provisioning Protocols [2] It's a reflection attack. The paper mentioned that attacker can induce the attacked target to generate null link key (zero key) without PIN code. In BR/EDR, the key generation is actually handled in the controller which is below HCI. Thus, we can ignore null link key in the handler of "Link Key Notification event" to relieve the attack. A similar implementation also shows in btstack project. [3] v3: Drop the connection when null link key be detected. v2: - Used Link: tag instead of Closes: - Used bt_dev_dbg instead of BT_DBG - Added Fixes: tag Cc: stable@vger.kernel.org Fixes: 55ed8ca10f35 ("Bluetooth: Implement link key handling for the management interface") Link: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26555 [1] Link: https://ieeexplore.ieee.org/abstract/document/9474325/authors#authors [2] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L3722 [3] Signed-off-by: Lee, Chun-Yi Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index e6cfc65abcb865..742dcfd136bc45 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4742,6 +4742,15 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, if (!conn) goto unlock; + /* Ignore NULL link key against CVE-2020-26555 */ + if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", + &ev->bdaddr); + hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); + hci_conn_drop(conn); + goto unlock; + } + hci_conn_hold(conn); conn->disc_timeout = HCI_DISCONN_TIMEOUT; hci_conn_drop(conn); From 1ffc6f8cc33268731fcf9629fc4438f6db1191fc Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Sun, 1 Oct 2023 16:59:58 +0800 Subject: [PATCH 127/661] Bluetooth: Reject connection with the device which has same BD_ADDR This change is used to relieve CVE-2020-26555. The description of the CVE: Bluetooth legacy BR/EDR PIN code pairing in Bluetooth Core Specification 1.0B through 5.2 may permit an unauthenticated nearby device to spoof the BD_ADDR of the peer device to complete pairing without knowledge of the PIN. [1] The detail of this attack is in IEEE paper: BlueMirror: Reflections on Bluetooth Pairing and Provisioning Protocols [2] It's a reflection attack. The paper mentioned that attacker can induce the attacked target to generate null link key (zero key) without PIN code. In BR/EDR, the key generation is actually handled in the controller which is below HCI. A condition of this attack is that attacker should change the BR_ADDR of his hacking device (Host B) to equal to the BR_ADDR with the target device being attacked (Host A). Thus, we reject the connection with device which has same BD_ADDR both on HCI_Create_Connection and HCI_Connection_Request to prevent the attack. A similar implementation also shows in btstack project. [3][4] Cc: stable@vger.kernel.org Link: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26555 [1] Link: https://ieeexplore.ieee.org/abstract/document/9474325/authors#authors [2] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L3523 [3] Link: https://github.com/bluekitchen/btstack/blob/master/src/hci.c#L7297 [4] Signed-off-by: Lee, Chun-Yi Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 9 +++++++++ net/bluetooth/hci_event.c | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7a6f20338db8ef..73470cc3518a71 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1627,6 +1627,15 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, return ERR_PTR(-EOPNOTSUPP); } + /* Reject outgoing connection to device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, dst)) { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + dst); + return ERR_PTR(-ECONNREFUSED); + } + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); if (!acl) { acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 742dcfd136bc45..61b1b244595e4a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3268,6 +3268,17 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "bdaddr %pMR type 0x%x", &ev->bdaddr, ev->link_type); + /* Reject incoming connection from device with same BD ADDR against + * CVE-2020-26555 + */ + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) + { + bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", + &ev->bdaddr); + hci_reject_conn(hdev, &ev->bdaddr); + return; + } + mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type, &flags); From a239110ee8e0b0aafa265f0d54f7a16744855e70 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 30 Sep 2023 15:53:32 +0300 Subject: [PATCH 128/661] Bluetooth: hci_sync: always check if connection is alive before deleting In hci_abort_conn_sync it is possible that conn is deleted concurrently by something else, also e.g. when waiting for hdev->lock. This causes double deletion of the conn, so UAF or conn_hash.list corruption. Fix by having all code paths check that the connection is still in conn_hash before deleting it, while holding hdev->lock which prevents any races. Log (when powering off while BAP streaming, occurs rarely): ======================================================================= kernel BUG at lib/list_debug.c:56! ... ? __list_del_entry_valid (lib/list_debug.c:56) hci_conn_del (net/bluetooth/hci_conn.c:154) bluetooth hci_abort_conn_sync (net/bluetooth/hci_sync.c:5415) bluetooth ? __pfx_hci_abort_conn_sync+0x10/0x10 [bluetooth] ? lock_release+0x1d5/0x3c0 ? hci_disconnect_all_sync.constprop.0+0xb2/0x230 [bluetooth] ? __pfx_lock_release+0x10/0x10 ? __kmem_cache_free+0x14d/0x2e0 hci_disconnect_all_sync.constprop.0+0xda/0x230 [bluetooth] ? __pfx_hci_disconnect_all_sync.constprop.0+0x10/0x10 [bluetooth] ? hci_clear_adv_sync+0x14f/0x170 [bluetooth] ? __pfx_set_powered_sync+0x10/0x10 [bluetooth] hci_set_powered_sync+0x293/0x450 [bluetooth] ======================================================================= Fixes: 94d9ba9f9888 ("Bluetooth: hci_sync: Fix UAF in hci_disconnect_all_sync") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d06e07a0ea5a98..a15ab0b874a9d5 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5369,6 +5369,7 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) { int err = 0; u16 handle = conn->handle; + bool disconnect = false; struct hci_conn *c; switch (conn->state) { @@ -5399,24 +5400,15 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) hci_dev_unlock(hdev); return 0; case BT_BOUND: - hci_dev_lock(hdev); - hci_conn_failed(conn, reason); - hci_dev_unlock(hdev); - return 0; + break; default: - hci_dev_lock(hdev); - conn->state = BT_CLOSED; - hci_disconn_cfm(conn, reason); - hci_conn_del(conn); - hci_dev_unlock(hdev); - return 0; + disconnect = true; + break; } hci_dev_lock(hdev); - /* Check if the connection hasn't been cleanup while waiting - * commands to complete. - */ + /* Check if the connection has been cleaned up concurrently */ c = hci_conn_hash_lookup_handle(hdev, handle); if (!c || c != conn) { err = 0; @@ -5428,7 +5420,13 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) * or in case of LE it was still scanning so it can be cleanup * safely. */ - hci_conn_failed(conn, reason); + if (disconnect) { + conn->state = BT_CLOSED; + hci_disconn_cfm(conn, reason); + hci_conn_del(conn); + } else { + hci_conn_failed(conn, reason); + } unlock: hci_dev_unlock(hdev); From c7f59461f5a78994613afc112cdd73688aef9076 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Wed, 4 Oct 2023 20:42:24 +0800 Subject: [PATCH 129/661] Bluetooth: Fix a refcnt underflow problem for hci_conn Syzbot reports a warning as follows: WARNING: CPU: 1 PID: 26946 at net/bluetooth/hci_conn.c:619 hci_conn_timeout+0x122/0x210 net/bluetooth/hci_conn.c:619 ... Call Trace: process_one_work+0x884/0x15c0 kernel/workqueue.c:2630 process_scheduled_works kernel/workqueue.c:2703 [inline] worker_thread+0x8b9/0x1290 kernel/workqueue.c:2784 kthread+0x33c/0x440 kernel/kthread.c:388 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 It is because the HCI_EV_SIMPLE_PAIR_COMPLETE event handler drops hci_conn directly without check Simple Pairing whether be enabled. But the Simple Pairing process can only be used if both sides have the support enabled in the host stack. Add hci_conn_ssp_enabled() for hci_conn in HCI_EV_IO_CAPA_REQUEST and HCI_EV_SIMPLE_PAIR_COMPLETE event handlers to fix the problem. Fixes: 0493684ed239 ("[Bluetooth] Disable disconnect timer during Simple Pairing") Signed-off-by: Ziyang Xuan Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 61b1b244595e4a..7e1c875e0e888e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5323,7 +5323,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; hci_conn_hold(conn); @@ -5570,7 +5570,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); - if (!conn) + if (!conn || !hci_conn_ssp_enabled(conn)) goto unlock; /* Reset the authentication requirement to unknown */ From ebd032fa881882fef2acb9da1bbde48d8233241d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Oct 2023 13:12:58 +0200 Subject: [PATCH 130/661] netfilter: nf_tables: do not remove elements if set backend implements .abort pipapo set backend maintains two copies of the datastructure, removing the elements from the copy that is going to be discarded slows down the abort path significantly, from several minutes to few seconds after this patch. Fixes: 212ed75dc5fb ("netfilter: nf_tables: integrate pipapo into commit protocol") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a72b6aeefb1b5d..c3de3791cabd23 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -10347,7 +10347,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - nft_setelem_remove(net, te->set, &te->elem); + if (!te->set->ops->abort || + nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_remove(net, te->set, &te->elem); + if (!nft_setelem_is_catchall(te->set, &te->elem)) atomic_dec(&te->set->nelems); From 2e1d175410972285333193837a4250a74cd472e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 5 Oct 2023 10:53:08 +0200 Subject: [PATCH 131/661] netfilter: nfnetlink_log: silence bogus compiler warning net/netfilter/nfnetlink_log.c:800:18: warning: variable 'ctinfo' is uninitialized The warning is bogus, the variable is only used if ct is non-NULL and always initialised in that case. Init to 0 too to silence this. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309100514.ndBFebXN-lkp@intel.com/ Signed-off-by: Florian Westphal --- net/netfilter/nfnetlink_log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 53c9e76473ba17..f03f4d4d7d8896 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -698,8 +698,8 @@ nfulnl_log_packet(struct net *net, unsigned int plen = 0; struct nfnl_log_net *log = nfnl_log_pernet(net); const struct nfnl_ct_hook *nfnl_ct = NULL; + enum ip_conntrack_info ctinfo = 0; struct nf_conn *ct = NULL; - enum ip_conntrack_info ctinfo; if (li_user && li_user->type == NF_LOG_TYPE_ULOG) li = li_user; From d51c42cdef5f961f63e39e172e4dfdcac54acd5e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Oct 2023 16:17:51 -0700 Subject: [PATCH 132/661] netfilter: nf_tables: Annotate struct nft_pipapo_match with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct nft_pipapo_match. Cc: Pablo Neira Ayuso Cc: Jozsef Kadlecsik Cc: Florian Westphal Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netfilter-devel@vger.kernel.org Cc: coreteam@netfilter.org Cc: netdev@vger.kernel.org Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Signed-off-by: Florian Westphal --- net/netfilter/nft_set_pipapo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 25a75591583ebe..2e164a319945f7 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -147,7 +147,7 @@ struct nft_pipapo_match { unsigned long * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; - struct nft_pipapo_field f[]; + struct nft_pipapo_field f[] __counted_by(field_count); }; /** From 4c90bba60c26db7dc7df450f748e86440149786e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 2 Oct 2023 11:57:42 +0200 Subject: [PATCH 133/661] netfilter: nf_tables: do not refresh timeout when resetting element The dump and reset command should not refresh the timeout, this command is intended to allow users to list existing stateful objects and reset them, element expiration should be refresh via transaction instead with a specific command to achieve this, otherwise this is entering combo semantics that will be hard to be undone later (eg. a user asking to retrieve counters but _not_ requiring to refresh expiration). Fixes: 079cd633219d ("netfilter: nf_tables: Introduce NFT_MSG_GETSETELEM_RESET") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c3de3791cabd23..aae6ffebb413c7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5556,7 +5556,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - u64 timeout = 0; nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM); if (nest == NULL) @@ -5592,15 +5591,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { - timeout = *nft_set_ext_timeout(ext); - if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - nf_jiffies64_to_msecs(timeout), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; - } else if (set->flags & NFT_SET_TIMEOUT) { - timeout = READ_ONCE(set->timeout); - } + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { u64 expires, now = get_jiffies_64(); @@ -5615,9 +5610,6 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nf_jiffies64_to_msecs(expires), NFTA_SET_ELEM_PAD)) goto nla_put_failure; - - if (reset) - *nft_set_ext_expiration(ext) = now + timeout; } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { From 52177bbf19e6e9398375a148d2e13ed492b40b80 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Mon, 9 Oct 2023 18:36:14 +0800 Subject: [PATCH 134/661] nf_tables: fix NULL pointer dereference in nft_inner_init() We should check whether the NFTA_INNER_NUM netlink attribute is present before accessing it, otherwise a null pointer deference error will occur. Call Trace: dump_stack_lvl+0x4f/0x90 print_report+0x3f0/0x620 kasan_report+0xcd/0x110 __asan_load4+0x84/0xa0 nft_inner_init+0x128/0x2e0 nf_tables_newrule+0x813/0x1230 nfnetlink_rcv_batch+0xec3/0x1170 nfnetlink_rcv+0x1e4/0x220 netlink_unicast+0x34e/0x4b0 netlink_sendmsg+0x45c/0x7e0 __sys_sendto+0x355/0x370 __x64_sys_sendto+0x84/0xa0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Xingyuan Mo Signed-off-by: Florian Westphal --- net/netfilter/nft_inner.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index 28e2873ba24e42..928312d01eb1d6 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -298,6 +298,7 @@ static int nft_inner_init(const struct nft_ctx *ctx, int err; if (!tb[NFTA_INNER_FLAGS] || + !tb[NFTA_INNER_NUM] || !tb[NFTA_INNER_HDRSIZE] || !tb[NFTA_INNER_TYPE] || !tb[NFTA_INNER_EXPR]) From 505ce0630ad5d31185695f8a29dde8d29f28faa7 Mon Sep 17 00:00:00 2001 From: Xingyuan Mo Date: Mon, 9 Oct 2023 18:36:15 +0800 Subject: [PATCH 135/661] nf_tables: fix NULL pointer dereference in nft_expr_inner_parse() We should check whether the NFTA_EXPR_NAME netlink attribute is present before accessing it, otherwise a null pointer deference error will occur. Call Trace: dump_stack_lvl+0x4f/0x90 print_report+0x3f0/0x620 kasan_report+0xcd/0x110 __asan_load2+0x7d/0xa0 nla_strcmp+0x2f/0x90 __nft_expr_type_get+0x41/0xb0 nft_expr_inner_parse+0xe3/0x200 nft_inner_init+0x1be/0x2e0 nf_tables_newrule+0x813/0x1230 nfnetlink_rcv_batch+0xec3/0x1170 nfnetlink_rcv+0x1e4/0x220 netlink_unicast+0x34e/0x4b0 netlink_sendmsg+0x45c/0x7e0 __sys_sendto+0x355/0x370 __x64_sys_sendto+0x84/0xa0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x6e/0xd8 Fixes: 3a07327d10a0 ("netfilter: nft_inner: support for inner tunnel header matching") Signed-off-by: Xingyuan Mo Signed-off-by: Florian Westphal --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index aae6ffebb413c7..a623d31b6518fd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3166,7 +3166,7 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, if (err < 0) return err; - if (!tb[NFTA_EXPR_DATA]) + if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME]) return -EINVAL; type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); From d351c1ea2de3e36e608fc355d8ae7d0cc80e6cd6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 8 Oct 2023 19:36:53 +0200 Subject: [PATCH 136/661] netfilter: nft_payload: fix wrong mac header matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mcast packets get looped back to the local machine. Such packets have a 0-length mac header, we should treat this like "mac header not set" and abort rule evaluation. As-is, we just copy data from the network header instead. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Blažej Krajňák Signed-off-by: Florian Westphal --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 120f6d395b98b3..0a689c8e0295df 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -179,7 +179,7 @@ void nft_payload_eval(const struct nft_expr *expr, switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: - if (!skb_mac_header_was_set(skb)) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0) goto err; if (skb_vlan_tag_present(skb) && From cf98fe6b579e55aa71b6197e34c112b51f0c2a66 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 12 Oct 2023 11:17:29 +0200 Subject: [PATCH 137/661] riscv: dts: starfive: visionfive 2: correct spi's ss pin The ss pin of spi0 is the same as sck pin. According to the visionfive 2 documentation, it should be pin 49 instead of 48. Fixes: 74fb20c8f05d ("riscv: dts: starfive: Add spi node and pins configuration") Reviewed-by: Emil Renner Berthing Signed-off-by: Nam Cao Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi index 12ebe979235635..2c02358abd711a 100644 --- a/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7110-starfive-visionfive-2.dtsi @@ -431,7 +431,7 @@ }; ss-pins { - pinmux = ; bias-disable; From 18164f66e6c59fda15c198b371fa008431efdb22 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:52 -0700 Subject: [PATCH 138/661] x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer Plumb an xfeatures mask into __copy_xstate_to_uabi_buf() so that KVM can constrain which xfeatures are saved into the userspace buffer without having to modify the user_xfeatures field in KVM's guest_fpu state. KVM's ABI for KVM_GET_XSAVE{2} is that features that are not exposed to guest must not show up in the effective xstate_bv field of the buffer. Saving only the guest-supported xfeatures allows userspace to load the saved state on a different host with a fewer xfeatures, so long as the target host supports the xfeatures that are exposed to the guest. KVM currently sets user_xfeatures directly to restrict KVM_GET_XSAVE{2} to the set of guest-supported xfeatures, but doing so broke KVM's historical ABI for KVM_SET_XSAVE, which allows userspace to load any xfeatures that are supported by the *host*. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/api.h | 3 ++- arch/x86/kernel/fpu/core.c | 5 +++-- arch/x86/kernel/fpu/xstate.c | 7 +++++-- arch/x86/kernel/fpu/xstate.h | 3 ++- arch/x86/kvm/x86.c | 21 +++++++++------------ 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 31089b851c4f2a..a2be3aefff9f89 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -157,7 +157,8 @@ static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { static inline void fpu_sync_guest_vmexit_xfd_state(void) { } #endif -extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); +extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, + unsigned int size, u64 xfeatures, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); static inline void fpstate_set_confidential(struct fpu_guest *gfpu) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a86d37052a6481..a21a4d0ecc345b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -369,14 +369,15 @@ int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest) EXPORT_SYMBOL_GPL(fpu_swap_kvm_fpstate); void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, - unsigned int size, u32 pkru) + unsigned int size, u64 xfeatures, u32 pkru) { struct fpstate *kstate = gfpu->fpstate; union fpregs_state *ustate = buf; struct membuf mb = { .p = buf, .left = size }; if (cpu_feature_enabled(X86_FEATURE_XSAVE)) { - __copy_xstate_to_uabi_buf(mb, kstate, pkru, XSTATE_COPY_XSAVE); + __copy_xstate_to_uabi_buf(mb, kstate, xfeatures, pkru, + XSTATE_COPY_XSAVE); } else { memcpy(&ustate->fxsave, &kstate->regs.fxsave, sizeof(ustate->fxsave)); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index cadf68737e6bc7..76408313ed7fd6 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1049,6 +1049,7 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer * @to: membuf descriptor * @fpstate: The fpstate buffer from which to copy + * @xfeatures: The mask of xfeatures to save (XSAVE mode only) * @pkru_val: The PKRU value to store in the PKRU component * @copy_mode: The requested copy mode * @@ -1059,7 +1060,8 @@ static void copy_feature(bool from_xstate, struct membuf *to, void *xstate, * It supports partial copy but @to.pos always starts from zero. */ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode) + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode) { const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr); struct xregs_state *xinit = &init_fpstate.regs.xsave; @@ -1083,7 +1085,7 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, break; case XSTATE_COPY_XSAVE: - header.xfeatures &= fpstate->user_xfeatures; + header.xfeatures &= fpstate->user_xfeatures & xfeatures; break; } @@ -1185,6 +1187,7 @@ void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode copy_mode) { __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate, + tsk->thread.fpu.fpstate->user_xfeatures, tsk->thread.pkru, copy_mode); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index a4ecb04d8d6467..3518fb26d06b02 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -43,7 +43,8 @@ enum xstate_copy_mode { struct membuf; extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, - u32 pkru_val, enum xstate_copy_mode copy_mode); + u64 xfeatures, u32 pkru_val, + enum xstate_copy_mode copy_mode); extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f18b06bbda66b..41d8e6c8570c53 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5382,26 +5382,23 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, - struct kvm_xsave *guest_xsave) + +static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, + u8 *state, unsigned int size) { if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - guest_xsave->region, - sizeof(guest_xsave->region), + fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, + vcpu->arch.guest_fpu.fpstate->user_xfeatures, vcpu->arch.pkru); } -static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, - u8 *state, unsigned int size) +static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, + struct kvm_xsave *guest_xsave) { - if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) - return; - - fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, - state, size, vcpu->arch.pkru); + return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, From 8647c52e9504c99752a39f1d44f6268f82c40a5c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:53 -0700 Subject: [PATCH 139/661] KVM: x86: Constrain guest-supported xfeatures only at KVM_GET_XSAVE{2} Mask off xfeatures that aren't exposed to the guest only when saving guest state via KVM_GET_XSAVE{2} instead of modifying user_xfeatures directly. Preserving the maximal set of xfeatures in user_xfeatures restores KVM's ABI for KVM_SET_XSAVE, which prior to commit ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") allowed userspace to load xfeatures that are supported by the host, irrespective of what xfeatures are exposed to the guest. There is no known use case where userspace *intentionally* loads xfeatures that aren't exposed to the guest, but the bug fixed by commit ad856280ddea was specifically that KVM_GET_SAVE{2} would save xfeatures that weren't exposed to the guest, e.g. would lead to userspace unintentionally loading guest-unsupported xfeatures when live migrating a VM. Restricting KVM_SET_XSAVE to guest-supported xfeatures is especially problematic for QEMU-based setups, as QEMU has a bug where instead of terminating the VM if KVM_SET_XSAVE fails, QEMU instead simply stops loading guest state, i.e. resumes the guest after live migration with incomplete guest state, and ultimately results in guest data corruption. Note, letting userspace restore all host-supported xfeatures does not fix setups where a VM is migrated from a host *without* commit ad856280ddea, to a target with a subset of host-supported xfeatures. However there is no way to safely address that scenario, e.g. KVM could silently drop the unsupported features, but that would be a clear violation of KVM's ABI and so would require userspace to opt-in, at which point userspace could simply be updated to sanitize the to-be-loaded XSAVE state. Reported-by: Tyler Stachecki Closes: https://lore.kernel.org/all/20230914010003.358162-1-tstachecki@bloomberg.net Fixes: ad856280ddea ("x86/kvm/fpu: Limit guest user_xfeatures to supported bits of XCR0") Cc: stable@vger.kernel.org Cc: Leonardo Bras Signed-off-by: Sean Christopherson Acked-by: Dave Hansen Message-Id: <20230928001956.924301-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/fpu/xstate.c | 5 +---- arch/x86/kvm/cpuid.c | 8 -------- arch/x86/kvm/x86.c | 18 ++++++++++++++++-- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 76408313ed7fd6..ef6906107c541d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1539,10 +1539,7 @@ static int fpstate_realloc(u64 xfeatures, unsigned int ksize, fpregs_restore_userregs(); newfps->xfeatures = curfps->xfeatures | xfeatures; - - if (!guest_fpu) - newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; - + newfps->user_xfeatures = curfps->user_xfeatures | xfeatures; newfps->xfd = curfps->xfd & ~xfeatures; /* Do the final updates within the locked region */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0544e30b4946d1..773132c3bf5af7 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -360,14 +360,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); - /* - * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if - * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't - * supported by the host. - */ - vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 | - XFEATURE_MASK_FPSSE; - kvm_update_pv_runtime(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41d8e6c8570c53..1e645f5b1e2cf1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5386,12 +5386,26 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, u8 *state, unsigned int size) { + /* + * Only copy state for features that are enabled for the guest. The + * state itself isn't problematic, but setting bits in the header for + * features that are supported in *this* host but not exposed to the + * guest can result in KVM_SET_XSAVE failing when live migrating to a + * compatible host without the features that are NOT exposed to the + * guest. + * + * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if + * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't + * supported by the host. + */ + u64 supported_xcr0 = vcpu->arch.guest_supported_xcr0 | + XFEATURE_MASK_FPSSE; + if (fpstate_is_confidential(&vcpu->arch.guest_fpu)) return; fpu_copy_guest_fpstate_to_uabi(&vcpu->arch.guest_fpu, state, size, - vcpu->arch.guest_fpu.fpstate->user_xfeatures, - vcpu->arch.pkru); + supported_xcr0, vcpu->arch.pkru); } static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, From 60d351f18f7a8acd08964ef1d5c948354a7907be Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:54 -0700 Subject: [PATCH 140/661] KVM: selftests: Touch relevant XSAVE state in guest for state test Modify support XSAVE state in the "state test's" guest code so that saving and loading state via KVM_{G,S}ET_XSAVE actually does something useful, i.e. so that xstate_bv in XSAVE state isn't empty. Punt on BNDCSR for now, it's easier to just stuff that xfeature from the host side. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-4-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 14 ++++ .../testing/selftests/kvm/x86_64/state_test.c | 77 +++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4fd04211252672..6f66861175adbe 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -68,6 +68,12 @@ struct xstate { #define XFEATURE_MASK_OPMASK BIT_ULL(5) #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) @@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) @@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value) __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4c4925a8ab4523..df3e93df4343fa 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) static void __attribute__((__flatten__)) guest_code(void *arg) { GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[4096]; + + memset(buffer, 0xcc, sizeof(buffer)); + + set_cr4(get_cr4() | X86_CR4_OSXSAVE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + xsetbv(0, xgetbv(0) | supported_xcr0); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + GUEST_SYNC(2); if (arg) { From 77709820787355f45755fe454e26fca8584ecf40 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:55 -0700 Subject: [PATCH 141/661] KVM: selftests: Load XSAVE state into untouched vCPU during state test Expand x86's state test to load XSAVE state into a "dummy" vCPU prior to KVM_SET_CPUID2, and again with an empty guest CPUID model. Except for off-by-default features, i.e. AMX, KVM's ABI for KVM_SET_XSAVE is that userspace is allowed to load xfeatures so long as they are supported by the host. This is a regression test for a combination of KVM bugs where the state saved by KVM_GET_XSAVE{2} could not be loaded via KVM_SET_XSAVE if the saved xstate_bv would load guest-unsupported xfeatures. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/x86_64/state_test.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index df3e93df4343fa..115b2cdf927946 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -231,9 +231,9 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; - + struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vcpuN; struct kvm_vm *vm; struct kvm_x86_state *state; struct ucall uc; @@ -286,6 +286,21 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + */ + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); From 87e3ca055cdc6c63fb82b9bec7c370405d03f6ef Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Sep 2023 17:19:56 -0700 Subject: [PATCH 142/661] KVM: selftests: Force load all supported XSAVE state in state test Extend x86's state to forcefully load *all* host-supported xfeatures by modifying xstate_bv in the saved state. Stuffing xstate_bv ensures that the selftest is verifying KVM's full ABI regardless of whether or not the guest code is successful in getting various xfeatures out of their INIT state, e.g. see the disaster that is/was MPX. Signed-off-by: Sean Christopherson Message-Id: <20230928001956.924301-6-seanjc@google.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 9 +++++++++ tools/testing/selftests/kvm/x86_64/state_test.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6f66861175adbe..25bc61dac5fbe6 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -922,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) !kvm_cpu_has(feature.anti_feature); } +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 115b2cdf927946..88b58aab720773 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -230,6 +230,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { + uint64_t *xstate_bv, saved_xstate_bv; vm_vaddr_t nested_gva = 0; struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; @@ -294,12 +295,25 @@ int main(int argc, char *argv[]) * allow KVM_SET_XSAVE regardless of guest CPUID. Manually * load only XSAVE state, MSRs in particular have a much more * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); vcpu_init_cpuid(vcpuN, &empty_cpuid); vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); kvm_x86_state_cleanup(state); From b65235f6e102354ccafda601eaa1c5bef5284d21 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:51 +0300 Subject: [PATCH 143/661] x86: KVM: SVM: always update the x2avic msr interception The following problem exists since x2avic was enabled in the KVM: svm_set_x2apic_msr_interception is called to enable the interception of the x2apic msrs. In particular it is called at the moment the guest resets its apic. Assuming that the guest's apic was in x2apic mode, the reset will bring it back to the xapic mode. The svm_set_x2apic_msr_interception however has an erroneous check for '!apic_x2apic_mode()' which prevents it from doing anything in this case. As a result of this, all x2apic msrs are left unintercepted, and that exposes the bare metal x2apic (if enabled) to the guest. Oops. Remove the erroneous '!apic_x2apic_mode()' check to fix that. This fixes CVE-2023-5090 Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode") Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Reviewed-by: Suravee Suthikulpanit Tested-by: Suravee Suthikulpanit Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9507df93f410a6..acdd0b89e4715a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -913,8 +913,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept) if (intercept == svm->x2avic_msrs_intercepted) return; - if (!x2avic_enabled || - !apic_x2apic_mode(svm->vcpu.arch.apic)) + if (!x2avic_enabled) return; for (i = 0; i < MAX_DIRECT_ACCESS_MSRS; i++) { From 2dcf37abf9d3aab7f975002d29fc7c17272def38 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:52 +0300 Subject: [PATCH 144/661] x86: KVM: SVM: add support for Invalid IPI Vector interception In later revisions of AMD's APM, there is a new 'incomplete IPI' exit code: "Invalid IPI Vector - The vector for the specified IPI was set to an illegal value (VEC < 16)" Note that tests on Zen2 machine show that this VM exit doesn't happen and instead AVIC just does nothing. Add support for this exit code by doing nothing, instead of filling the kernel log with errors. Also replace an unthrottled 'pr_err()' if another unknown incomplete IPI exit happens with vcpu_unimpl() (e.g in case AMD adds yet another 'Invalid IPI' exit reason) Cc: Signed-off-by: Maxim Levitsky Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 1 + arch/x86/kvm/svm/avic.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 19bf955b67e0da..3ac0ffc4f3e202 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -268,6 +268,7 @@ enum avic_ipi_failure_cause { AVIC_IPI_FAILURE_TARGET_NOT_RUNNING, AVIC_IPI_FAILURE_INVALID_TARGET, AVIC_IPI_FAILURE_INVALID_BACKING_PAGE, + AVIC_IPI_FAILURE_INVALID_IPI_VECTOR, }; #define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 2092db892d7d05..4b74ea91f4e6bb 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -529,8 +529,11 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); break; + case AVIC_IPI_FAILURE_INVALID_IPI_VECTOR: + /* Invalid IPI with vector < 16 */ + break; default: - pr_err("Unknown IPI interception\n"); + vcpu_unimpl(vcpu, "Unknown avic incomplete IPI interception\n"); } return 1; From 3fdc6087df3be73a212a81ce5dd6516638568806 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 28 Sep 2023 20:33:53 +0300 Subject: [PATCH 145/661] x86: KVM: SVM: refresh AVIC inhibition in svm_leave_nested() svm_leave_nested() similar to a nested VM exit, get the vCPU out of nested mode and thus should end the local inhibition of AVIC on this vCPU. Failure to do so, can lead to hangs on guest reboot. Raise the KVM_REQ_APICV_UPDATE request to refresh the AVIC state of the current vCPU in this case. Fixes: f44509f849fe ("KVM: x86: SVM: allow AVIC to co-exist with a nested guest running") Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Reviewed-by: Sean Christopherson Message-Id: <20230928173354.217464-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index dd496c9e5f91f2..3fea8c47679e68 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1253,6 +1253,9 @@ void svm_leave_nested(struct kvm_vcpu *vcpu) nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); + + if (kvm_apicv_activated(vcpu->kvm)) + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); From 3e9346734661cc20bd77aeb43dbf4e5f46a2c16e Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Oct 2023 13:28:54 -0500 Subject: [PATCH 146/661] KVM: SVM: Fix build error when using -Werror=unused-but-set-variable Commit 916e3e5f26ab ("KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX") introduced a local variable used for the rdmsr() function for the high 32-bits of the MSR value. This variable is not used after being set and triggers a warning or error, when treating warnings as errors, when the unused-but-set-variable flag is set. Mark this variable as __maybe_unused to fix this. Fixes: 916e3e5f26ab ("KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX") Signed-off-by: Tom Lendacky Reviewed-by: Sean Christopherson Message-Id: <0da9874b6e9fcbaaa5edeb345d7e2a7c859fc818.1696271334.git.thomas.lendacky@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index acdd0b89e4715a..beea99c8e8e05e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -691,7 +691,7 @@ static int svm_hardware_enable(void) */ if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) { struct sev_es_save_area *hostsa; - u32 msr_hi; + u32 __maybe_unused msr_hi; hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); From 60197a4631b907b30343e25af702257d92f359cf Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 12 Oct 2023 12:16:17 +0530 Subject: [PATCH 147/661] KVM: arm64: pmu: Drop redundant check for non-NULL kvm_pmu_events There is an allocated and valid struct kvm_pmu_events for each cpu on the system via DEFINE_PER_CPU(). Hence there cannot be a NULL pointer accessed via this_cpu_ptr() in the helper kvm_get_pmu_events(). Hence non-NULL check for pmu in such places are redundant and can be dropped. Cc: Marc Zyngier Cc: Oliver Upton Cc: James Morse Cc: Suzuki K Poulose Cc: kvmarm@lists.linux.dev Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012064617.897346-1-anshuman.khandual@arm.com --- arch/arm64/kvm/pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 0eea225fd09a7a..a243934c5568bb 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -39,7 +39,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3() || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) @@ -55,7 +55,7 @@ void kvm_clr_pmu_events(u32 clr) { struct kvm_pmu_events *pmu = kvm_get_pmu_events(); - if (!kvm_arm_support_pmu_v3() || !pmu) + if (!kvm_arm_support_pmu_v3()) return; pmu->events_host &= ~clr; From e2145c99b53ec88105c69bbbb577265660d08c69 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Oct 2023 11:25:40 -0400 Subject: [PATCH 148/661] KVM: MIPS: fix -Wunused-but-set-variable warning The variable is completely unused, remove it. Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 7b2ac1319d70ac..467ee6b95ae1c6 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -592,7 +592,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, gfn_t gfn = gpa >> PAGE_SHIFT; int srcu_idx, err; kvm_pfn_t pfn; - pte_t *ptep, entry, old_pte; + pte_t *ptep, entry; bool writeable; unsigned long prot_bits; unsigned long mmu_seq; @@ -664,7 +664,6 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, entry = pfn_pte(pfn, __pgprot(prot_bits)); /* Write the PTE */ - old_pte = *ptep; set_pte(ptep, entry); err = 0; From 0fd76865006dfdc3cdc6dade538ca2257a847364 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Oct 2023 13:34:58 +0100 Subject: [PATCH 149/661] KVM: arm64: Add nPIR{E0}_EL1 to HFG traps nPIR_EL1 and nPIREO_EL1 are part of the 'reverse polarity' set of bits, set them so that we disable the traps for a guest. Unfortunately, these bits are not yet described in the ARM ARM, but only live in the XML description. Also add them to the NV FGT forwarding infrastructure. Signed-off-by: Joey Gouly Fixes: e930694e6145 ("KVM: arm64: Restructure FGT register switching") Cc: Oliver Upton [maz: add entries to the NV FGT array, commit message update] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012123459.2820835-2-joey.gouly@arm.com --- arch/arm64/include/asm/kvm_arm.h | 4 ++-- arch/arm64/kvm/emulate-nested.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5882b241559641..1095c6647e9665 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -344,14 +344,14 @@ */ #define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51)) #define __HFGRTR_EL2_MASK GENMASK(49, 0) -#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGRTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \ BIT(46) | BIT(42) | BIT(40) | BIT(28) | \ GENMASK(26, 25) | BIT(21) | BIT(18) | \ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2)) #define __HFGWTR_EL2_MASK GENMASK(49, 0) -#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50)) +#define __HFGWTR_EL2_nMASK (GENMASK(58, 57) | GENMASK(55, 54) | BIT(50)) #define __HFGITR_EL2_RES0 GENMASK(63, 57) #define __HFGITR_EL2_MASK GENMASK(54, 0) diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 9ced1bf0c2b782..ee902ff2a50f80 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -977,6 +977,8 @@ enum fg_filter_id { static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = { /* HFGRTR_EL2, HFGWTR_EL2 */ + SR_FGT(SYS_PIR_EL1, HFGxTR, nPIR_EL1, 0), + SR_FGT(SYS_PIRE0_EL1, HFGxTR, nPIRE0_EL1, 0), SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0), SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0), SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0), From 839d90357b7ce6b129045d28ac22bd3a247e2350 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 12 Oct 2023 13:34:59 +0100 Subject: [PATCH 150/661] KVM: arm64: POR{E0}_EL1 do not need trap handlers These will not be trapped by KVM, so don't need a handler. Signed-off-by: Joey Gouly Cc: Marc Zyngier Cc: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20231012123459.2820835-3-joey.gouly@arm.com --- arch/arm64/kvm/sys_regs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e92ec810d4494b..0afd6136e2759c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2122,8 +2122,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, - { SYS_DESC(SYS_PIRE0_EL1), access_vm_reg, reset_unknown, PIRE0_EL1 }, - { SYS_DESC(SYS_PIR_EL1), access_vm_reg, reset_unknown, PIR_EL1 }, + { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 }, + { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, { SYS_DESC(SYS_LORSA_EL1), trap_loregion }, From 9404673293b065cbb16b8915530147cac7e80b4d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Aug 2023 13:18:10 +0100 Subject: [PATCH 151/661] KVM: arm64: timers: Correctly handle TGE flip with CNTPOFF_EL2 Contrary to common belief, HCR_EL2.TGE has a direct and immediate effect on the way the EL0 physical counter is offset. Flipping TGE from 1 to 0 while at EL2 immediately changes the way the counter compared to the CVAL limit. This means that we cannot directly save/restore the guest's view of CVAL, but that we instead must treat it as if CNTPOFF didn't exist. Only in the world switch, once we figure out that we do have CNTPOFF, can we must the offset back and forth depending on the polarity of TGE. Fixes: 2b4825a86940 ("KVM: arm64: timers: Use CNTPOFF_EL2 to offset the physical timer") Reported-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 13 +++------- arch/arm64/kvm/hyp/vhe/switch.c | 44 +++++++++++++++++++++++++++++++++ include/kvm/arm_arch_timer.h | 7 ++++++ 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 6dcdae4d38cb5a..a1e24228aaaa76 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = { .get_input_level = kvm_arch_timer_get_input_level, }; -static bool has_cntpoff(void) -{ - return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); -} - static int nr_timers(struct kvm_vcpu *vcpu) { if (!vcpu_has_nv(vcpu)) @@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void) return timecounter->cc->read(timecounter->cc); } -static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) { if (vcpu_has_nv(vcpu)) { if (is_hyp_ctxt(vcpu)) { @@ -548,8 +543,7 @@ static void timer_save_state(struct arch_timer_context *ctx) timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL)); cval = read_sysreg_el0(SYS_CNTP_CVAL); - if (!has_cntpoff()) - cval -= timer_get_offset(ctx); + cval -= timer_get_offset(ctx); timer_set_cval(ctx, cval); @@ -636,8 +630,7 @@ static void timer_restore_state(struct arch_timer_context *ctx) cval = timer_get_cval(ctx); offset = timer_get_offset(ctx); set_cntpoff(offset); - if (!has_cntpoff()) - cval += offset; + cval += offset; write_sysreg_el0(cval, SYS_CNTP_CVAL); isb(); write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 6537f58b1a8cc0..448b17080d3617 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -39,6 +39,26 @@ static void __activate_traps(struct kvm_vcpu *vcpu) ___activate_traps(vcpu); + if (has_cntpoff()) { + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * We're entrering the guest. Reload the correct + * values from memory now that TGE is clear. + */ + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0); + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2); + + if (map.direct_ptimer) { + write_sysreg_el0(val, SYS_CNTP_CVAL); + isb(); + } + } + val = read_sysreg(cpacr_el1); val |= CPACR_ELx_TTA; val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN | @@ -77,6 +97,30 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (has_cntpoff()) { + struct timer_map map; + u64 val, offset; + + get_timer_map(vcpu, &map); + + /* + * We're exiting the guest. Save the latest CVAL value + * to memory and apply the offset now that TGE is set. + */ + val = read_sysreg_el0(SYS_CNTP_CVAL); + if (map.direct_ptimer == vcpu_ptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + if (map.direct_ptimer == vcpu_hptimer(vcpu)) + __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + + offset = read_sysreg_s(SYS_CNTPOFF_EL2); + + if (map.direct_ptimer && offset) { + write_sysreg_el0(val + offset, SYS_CNTP_CVAL); + isb(); + } + } + /* * ARM errata 1165522 and 1530923 require the actual execution of the * above before we can switch to the EL2/EL0 translation regime used by diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index bb3cb005873ec9..e748bc957d8326 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -82,6 +82,8 @@ struct timer_map { struct arch_timer_context *emul_ptimer; }; +void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map); + struct arch_timer_cpu { struct arch_timer_context timers[NR_KVM_TIMERS]; @@ -145,4 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt); void kvm_timer_cpu_up(void); void kvm_timer_cpu_down(void); +static inline bool has_cntpoff(void) +{ + return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF)); +} + #endif From 7b821db95140e2c118567aee22a78bf85f3617e0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 2 Oct 2023 16:54:06 -0700 Subject: [PATCH 152/661] drm/bridge: ti-sn65dsi86: Associate DSI device lifetime with auxiliary device The kernel produces a warning splat and the DSI device fails to register in this driver if the i2c driver probes, populates child auxiliary devices, and then somewhere in ti_sn_bridge_probe() a function call returns -EPROBE_DEFER. When the auxiliary driver probe defers, the dsi device created by devm_mipi_dsi_device_register_full() is left registered because the devm managed device used to manage the lifetime of the DSI device is the parent i2c device, not the auxiliary device that is being probed. Associate the DSI device created and managed by this driver to the lifetime of the auxiliary device, not the i2c device, so that the DSI device is removed when the auxiliary driver unbinds. Similarly change the device pointer used for dev_err_probe() so the deferred probe errors are associated with the auxiliary device instead of the parent i2c device so we can narrow down future problems faster. Cc: Douglas Anderson Cc: Maxime Ripard Fixes: c3b75d4734cb ("drm/bridge: sn65dsi86: Register and attach our DSI device at probe") Signed-off-by: Stephen Boyd Reviewed-by: Neil Armstrong Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20231002235407.769399-1-swboyd@chromium.org --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index f448b903e19075..84148a79414b7f 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -692,7 +692,7 @@ static struct ti_sn65dsi86 *bridge_to_ti_sn65dsi86(struct drm_bridge *bridge) return container_of(bridge, struct ti_sn65dsi86, bridge); } -static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) +static int ti_sn_attach_host(struct auxiliary_device *adev, struct ti_sn65dsi86 *pdata) { int val; struct mipi_dsi_host *host; @@ -707,7 +707,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) if (!host) return -EPROBE_DEFER; - dsi = devm_mipi_dsi_device_register_full(dev, host, &info); + dsi = devm_mipi_dsi_device_register_full(&adev->dev, host, &info); if (IS_ERR(dsi)) return PTR_ERR(dsi); @@ -725,7 +725,7 @@ static int ti_sn_attach_host(struct ti_sn65dsi86 *pdata) pdata->dsi = dsi; - return devm_mipi_dsi_attach(dev, dsi); + return devm_mipi_dsi_attach(&adev->dev, dsi); } static int ti_sn_bridge_attach(struct drm_bridge *bridge, @@ -1298,9 +1298,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, struct device_node *np = pdata->dev->of_node; int ret; - pdata->next_bridge = devm_drm_of_get_bridge(pdata->dev, np, 1, 0); + pdata->next_bridge = devm_drm_of_get_bridge(&adev->dev, np, 1, 0); if (IS_ERR(pdata->next_bridge)) - return dev_err_probe(pdata->dev, PTR_ERR(pdata->next_bridge), + return dev_err_probe(&adev->dev, PTR_ERR(pdata->next_bridge), "failed to create panel bridge\n"); ti_sn_bridge_parse_lanes(pdata, np); @@ -1319,9 +1319,9 @@ static int ti_sn_bridge_probe(struct auxiliary_device *adev, drm_bridge_add(&pdata->bridge); - ret = ti_sn_attach_host(pdata); + ret = ti_sn_attach_host(adev, pdata); if (ret) { - dev_err_probe(pdata->dev, ret, "failed to attach dsi host\n"); + dev_err_probe(&adev->dev, ret, "failed to attach dsi host\n"); goto err_remove_bridge; } From ad3e33fe071dffea07279f96dab4f3773c430fe2 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 25 Sep 2023 15:00:11 -0700 Subject: [PATCH 153/661] drm/panel: Move AUX B116XW03 out of panel-edp back to panel-simple In commit 5f04e7ce392d ("drm/panel-edp: Split eDP panels out of panel-simple") I moved a pile of panels out of panel-simple driver into the newly created panel-edp driver. One of those panels, however, shouldn't have been moved. As is clear from commit e35e305eff0f ("drm/panel: simple: Add AUO B116XW03 panel support"), AUX B116XW03 is an LVDS panel. It's used in exynos5250-snow and exynos5420-peach-pit where it's clear that the panel is hooked up with LVDS. Furthermore, searching for datasheets I found one that makes it clear that this panel is LVDS. As far as I can tell, I got confused because in commit 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on") Jitao Shi added "DRM_MODE_CONNECTOR_eDP". That seems wrong. Looking at the downstream ChromeOS trees, it seems like some Mediatek boards are using a panel that they call "auo,b116xw03" that's an eDP panel. The best I can guess is that they actually have a different panel that has similar timing. If so then the proper panel should be used or they should switch to the generic "edp-panel" compatible. When moving this back to panel-edp, I wasn't sure what to use for .bus_flags and .bus_format and whether to add the extra "enable" delay from commit 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on"). I've added formats/flags/delays based on my (inexpert) analysis of the datasheet. These are untested. NOTE: if/when this is backported to stable, we might run into some trouble. Specifically, before 474c162878ba ("arm64: dts: mt8183: jacuzzi: Move panel under aux-bus") this panel was used by "mt8183-kukui-jacuzzi", which assumed it was an eDP panel. I don't know what to suggest for that other than someone making up a bogus panel for jacuzzi that's just for the stable channel. Fixes: 88d3457ceb82 ("drm/panel: auo,b116xw03: fix flash backlight when power on") Fixes: 5f04e7ce392d ("drm/panel-edp: Split eDP panels out of panel-simple") Tested-by: Anton Bambura Acked-by: Hsin-Yi Wang Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230925150010.1.Iff672233861bcc4cf25a7ad0a81308adc3bda8a4@changeid --- drivers/gpu/drm/panel/panel-edp.c | 29 ----------------------- drivers/gpu/drm/panel/panel-simple.c | 35 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index feb665df35a1d9..95c8472d878a99 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -976,32 +976,6 @@ static const struct panel_desc auo_b116xak01 = { }, }; -static const struct drm_display_mode auo_b116xw03_mode = { - .clock = 70589, - .hdisplay = 1366, - .hsync_start = 1366 + 40, - .hsync_end = 1366 + 40 + 40, - .htotal = 1366 + 40 + 40 + 32, - .vdisplay = 768, - .vsync_start = 768 + 10, - .vsync_end = 768 + 10 + 12, - .vtotal = 768 + 10 + 12 + 6, - .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, -}; - -static const struct panel_desc auo_b116xw03 = { - .modes = &auo_b116xw03_mode, - .num_modes = 1, - .bpc = 6, - .size = { - .width = 256, - .height = 144, - }, - .delay = { - .enable = 400, - }, -}; - static const struct drm_display_mode auo_b133han05_mode = { .clock = 142600, .hdisplay = 1920, @@ -1725,9 +1699,6 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "auo,b116xa01", .data = &auo_b116xak01, - }, { - .compatible = "auo,b116xw03", - .data = &auo_b116xw03, }, { .compatible = "auo,b133han05", .data = &auo_b133han05, diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 95959dcc6e0e2c..dd7928d9570f72 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -919,6 +919,38 @@ static const struct panel_desc auo_b101xtn01 = { }, }; +static const struct drm_display_mode auo_b116xw03_mode = { + .clock = 70589, + .hdisplay = 1366, + .hsync_start = 1366 + 40, + .hsync_end = 1366 + 40 + 40, + .htotal = 1366 + 40 + 40 + 32, + .vdisplay = 768, + .vsync_start = 768 + 10, + .vsync_end = 768 + 10 + 12, + .vtotal = 768 + 10 + 12 + 6, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, +}; + +static const struct panel_desc auo_b116xw03 = { + .modes = &auo_b116xw03_mode, + .num_modes = 1, + .bpc = 6, + .size = { + .width = 256, + .height = 144, + }, + .delay = { + .prepare = 1, + .enable = 200, + .disable = 200, + .unprepare = 500, + }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing auo_g070vvn01_timings = { .pixelclock = { 33300000, 34209000, 45000000 }, .hactive = { 800, 800, 800 }, @@ -4102,6 +4134,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "auo,b101xtn01", .data = &auo_b101xtn01, + }, { + .compatible = "auo,b116xw03", + .data = &auo_b116xw03, }, { .compatible = "auo,g070vvn01", .data = &auo_g070vvn01, From 1531309aa2092a96c092fa662863ffa53da3ba93 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 12 Oct 2023 12:04:28 +0100 Subject: [PATCH 154/661] soc: renesas: Make ARCH_R9A07G043 depend on required options Randy reported a randconfig build issue against linux-next: WARNING: unmet direct dependencies detected for ERRATA_ANDES Depends on [n]: RISCV_ALTERNATIVE [=n] && RISCV_SBI [=y] Selected by [y]: - ARCH_R9A07G043 [=y] && SOC_RENESAS [=y] && RISCV [=y] && NONPORTABLE [=y] && RISCV_SBI [=y] ../arch/riscv/errata/andes/errata.c:59:54: warning: 'struct alt_entry' declared inside parameter list will not be visible outside of this definition or declaration 59 | void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, On RISC-V, alternatives are not usable in XIP kernels, which this randconfig happened to select. Rather than add a check for whether alternatives are available before selecting the ERRATA_ANDES config option, rework the R9A07G043 Kconfig entry to depend on the configuration options required to support its non-standard cache coherency implementation. Without these options enabled, the SoC is effectively non-functional to begin with, so there's an extra benefit in preventing the creation of non-functional kernels. The "if RISCV_DMA_NONCOHERENT" can be dropped, as ERRATA_ANDES_CMO will select it. Reported-by: Randy Dunlap Closes: https://lore.kernel.org/all/09a6b0f0-76a1-45e3-ab52-329c47393d1d@infradead.org/ Signed-off-by: Conor Dooley Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20231012-pouch-parkway-7d26c04b3300@spud Signed-off-by: Geert Uytterhoeven --- drivers/soc/renesas/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/soc/renesas/Kconfig b/drivers/soc/renesas/Kconfig index 12040ce116a551..93f42c159ad477 100644 --- a/drivers/soc/renesas/Kconfig +++ b/drivers/soc/renesas/Kconfig @@ -334,12 +334,13 @@ if RISCV config ARCH_R9A07G043 bool "RISC-V Platform support for RZ/Five" depends on NONPORTABLE + depends on RISCV_ALTERNATIVE + depends on RISCV_SBI select ARCH_RZG2L - select AX45MP_L2_CACHE if RISCV_DMA_NONCOHERENT + select AX45MP_L2_CACHE select DMA_GLOBAL_POOL - select ERRATA_ANDES if RISCV_SBI - select ERRATA_ANDES_CMO if ERRATA_ANDES - + select ERRATA_ANDES + select ERRATA_ANDES_CMO help This enables support for the Renesas RZ/Five SoC. From f2cab4b318ee8023f4ad640b906ae268942a7db4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 07:02:31 -0700 Subject: [PATCH 155/661] drm/nouveau: exec: fix ioctl kernel-doc warning kernel-doc emits a warning: include/uapi/drm/nouveau_drm.h:49: warning: Cannot understand * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX on line 49 - I thought it was a doc line We don't have a way to document a macro value via kernel-doc, so change the "/**" kernel-doc marker to a C comment and format the comment more like a kernel-doc comment for consistency. Fixes: d59e75eef52d ("drm/nouveau: exec: report max pushs through getparam") Signed-off-by: Randy Dunlap Cc: Dave Airlie Cc: Danilo Krummrich Cc: Karol Herbst Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: Bragatheswaran Manickavel Reviewed-by: Lyude Paul Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20231008140231.17921-1-rdunlap@infradead.org --- include/uapi/drm/nouveau_drm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index eaf9f248619f35..0bade1592f34f2 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -45,8 +45,8 @@ extern "C" { #define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 #define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 -/** - * @NOUVEAU_GETPARAM_EXEC_PUSH_MAX +/* + * NOUVEAU_GETPARAM_EXEC_PUSH_MAX - query max pushes through getparam * * Query the maximum amount of IBs that can be pushed through a single * &drm_nouveau_exec structure and hence a single &DRM_IOCTL_NOUVEAU_EXEC From 8698cb92eeece1e326a4d6a051bcf143037c4d31 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 8 Aug 2023 14:21:30 +0300 Subject: [PATCH 156/661] net/mlx5: Perform DMA operations in the right locations The cited patch change mlx5 driver so that during probe DMA operations were performed before pci_enable_device(), and during teardown DMA operations were performed after pci_disable_device(). DMA operations require PCI to be enabled. Hence, The above leads to the following oops in PPC systems[1]. On s390x systems, as reported by Niklas Schnelle, this is a problem because mlx5_pci_init() is where the DMA and coherent mask is set but mlx5_cmd_init() already does a dma_alloc_coherent(). Thus a DMA allocation is done during probe before the correct mask is set. This causes probe to fail initialization of the cmdif SW structs on s390x after that is converted to the common dma-iommu code. This is because on s390x DMA addresses below 4 GiB are reserved on current machines and unlike the old s390x specific DMA API implementation common code enforces DMA masks. Fix it by performing the DMA operations during probe after pci_enable_device() and after the dma mask is set, and during teardown before pci_disable_device(). [1] Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 netconsole rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib iw_cm libiscsi scsi_transport_iscsi ib_cm ib_uverbs ib_core mlx5_core(-) ptp pps_core fuse vmx_crypto crc32c_vpmsum [last unloaded: mlx5_ib] CPU: 1 PID: 8937 Comm: modprobe Not tainted 6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02 #1 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c000000000423388 LR: c0000000001e733c CTR: c0000000001e4720 REGS: c0000000055636d0 TRAP: 0380 Not tainted (6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02) MSR: 8000000000009033 CR: 24008884 XER: 20040000 CFAR: c0000000001e7338 IRQMASK: 0 NIP [c000000000423388] __free_pages+0x28/0x160 LR [c0000000001e733c] dma_direct_free+0xac/0x190 Call Trace: [c000000005563970] [5deadbeef0000100] 0x5deadbeef0000100 (unreliable) [c0000000055639b0] [c0000000003d46cc] kfree+0x7c/0x150 [c000000005563a40] [c0000000001e47c8] dma_free_attrs+0xa8/0x1a0 [c000000005563aa0] [c008000000d0064c] mlx5_cmd_cleanup+0xa4/0x100 [mlx5_core] [c000000005563ad0] [c008000000cf629c] mlx5_mdev_uninit+0xf4/0x140 [mlx5_core] [c000000005563b00] [c008000000cf6448] remove_one+0x160/0x1d0 [mlx5_core] [c000000005563b40] [c000000000958540] pci_device_remove+0x60/0x110 [c000000005563b80] [c000000000a35e80] device_remove+0x70/0xd0 [c000000005563bb0] [c000000000a37a38] device_release_driver_internal+0x2a8/0x330 [c000000005563c00] [c000000000a37b8c] driver_detach+0x8c/0x160 [c000000005563c40] [c000000000a35350] bus_remove_driver+0x90/0x110 [c000000005563c80] [c000000000a38948] driver_unregister+0x48/0x90 [c000000005563cf0] [c000000000957e38] pci_unregister_driver+0x38/0x150 [c000000005563d40] [c008000000eb6140] mlx5_cleanup+0x38/0x90 [mlx5_core] Fixes: 06cd555f73ca ("net/mlx5: split mlx5_cmd_init() to probe and reload routines") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Reviewed-by: Leon Romanovsky Reviewed-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 64 ++++++++----------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index afb348579577c5..c22b0ad0c8701d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2186,52 +2186,23 @@ static u16 cmdif_rev(struct mlx5_core_dev *dev) int mlx5_cmd_init(struct mlx5_core_dev *dev) { - int size = sizeof(struct mlx5_cmd_prot_block); - int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; - u32 cmd_l; - int err; - - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) - return -ENOMEM; - err = alloc_cmd_page(dev, cmd); - if (err) - goto err_free_pool; - - cmd_l = (u32)(cmd->dma); - if (cmd_l & 0xfff) { - mlx5_core_err(dev, "invalid command queue address\n"); - err = -ENOMEM; - goto err_cmd_page; - } cmd->checksum_disabled = 1; spin_lock_init(&cmd->alloc_lock); spin_lock_init(&cmd->token_lock); - create_msg_cache(dev); - set_wqname(dev); cmd->wq = create_singlethread_workqueue(cmd->wq_name); if (!cmd->wq) { mlx5_core_err(dev, "failed to create command workqueue\n"); - err = -ENOMEM; - goto err_cache; + return -ENOMEM; } mlx5_cmdif_debugfs_init(dev); return 0; - -err_cache: - destroy_msg_cache(dev); -err_cmd_page: - free_cmd_page(dev, cmd); -err_free_pool: - dma_pool_destroy(cmd->pool); - return err; } void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) @@ -2240,15 +2211,15 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_cleanup(dev); destroy_workqueue(cmd->wq); - destroy_msg_cache(dev); - free_cmd_page(dev, cmd); - dma_pool_destroy(cmd->pool); } int mlx5_cmd_enable(struct mlx5_core_dev *dev) { + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; u32 cmd_h, cmd_l; + int err; memset(&cmd->vars, 0, sizeof(cmd->vars)); cmd->vars.cmdif_rev = cmdif_rev(dev); @@ -2281,10 +2252,21 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) sema_init(&cmd->vars.pages_sem, 1); sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); - if (WARN_ON(cmd_l & 0xfff)) - return -EINVAL; + if (cmd_l & 0xfff) { + mlx5_core_err(dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_cmd_page; + } iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); @@ -2297,17 +2279,27 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) cmd->mode = CMD_MODE_POLLING; cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + create_msg_cache(dev); create_debugfs_files(dev); return 0; + +err_cmd_page: + free_cmd_page(dev, cmd); +err_free_pool: + dma_pool_destroy(cmd->pool); + return err; } void mlx5_cmd_disable(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; - clean_debug_files(dev); flush_workqueue(cmd->wq); + clean_debug_files(dev); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, From 7624e58a8b3a251e3e5108b32f2183b34453db32 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 27 Aug 2023 13:31:53 +0300 Subject: [PATCH 157/661] net/mlx5: E-switch, register event handler before arming the event Currently, mlx5 is registering event handler for vport context change event some time after arming the event. this can lead to missing an event, which will result in wrong rules in the FDB. Hence, register the event handler before arming the event. This solution is valid since FW is sending vport context change event only on vports which SW armed, and SW arming the vport when enabling it, which is done after the FDB has been created. Fixes: 6933a9379559 ("net/mlx5: E-Switch, Use async events chain") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d4cde655506323..8d0b915a31214e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1038,11 +1038,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(err); } -static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, ESW_FUNCTIONS_CHANGED); @@ -1050,13 +1047,11 @@ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) } } -static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) { if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - flush_workqueue(esw->work_queue); } @@ -1483,6 +1478,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1495,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; - mlx5_eswitch_event_handlers_register(esw); + mlx5_eswitch_event_handler_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1622,7 +1620,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) */ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); - mlx5_eswitch_event_handlers_unregister(esw); + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handler_unregister(esw); esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", From 7a3ce8074878a68a75ceacec93d9ae05906eec86 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 9 Aug 2023 11:10:57 +0200 Subject: [PATCH 158/661] net/mlx5: Bridge, fix peer entry ageing in LAG mode With current implementation in single FDB LAG mode all packets are processed by eswitch 0 rules. As such, 'peer' FDB entries receive the packets for rules of other eswitches and are responsible for updating the main entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification from their background update wq task. However, this introduces a race condition when non-zero eswitch instance decides to delete a FDB entry, sends SWITCHDEV_FDB_DEL_TO_BRIDGE notification, but another eswitch's update task refreshes the same entry concurrently while its async delete work is still pending on the workque. In such case another SWITCHDEV_FDB_ADD_TO_BRIDGE event may be generated and entry will remain stuck in FDB marked as 'offloaded' since no more SWITCHDEV_FDB_DEL_TO_BRIDGE notifications are sent for deleting the peer entries. Fix the issue by synchronously marking deleted entries with MLX5_ESW_BRIDGE_FLAG_DELETED flag and skipping them in background update job. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/rep/bridge.c | 11 ++++++++ .../ethernet/mellanox/mlx5/core/esw/bridge.c | 25 ++++++++++++++++++- .../ethernet/mellanox/mlx5/core/esw/bridge.h | 3 +++ .../mellanox/mlx5/core/esw/bridge_priv.h | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0fef853eab6265..5d128c5b4529af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -467,6 +467,17 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, /* only handle the event on peers */ if (mlx5_esw_bridge_is_local(dev, rep, esw)) break; + + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + /* Mark for deletion to prevent the update wq task from + * spuriously refreshing the entry which would mark it again as + * offloaded in SW bridge. After this fallthrough to regular + * async delete code. + */ + mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); fallthrough; case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index e36294b7ade274..1b9bc32efd6fa9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1748,6 +1748,28 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 entry->lastuse = jiffies; } +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED; +} + void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info) @@ -1810,7 +1832,8 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) unsigned long lastuse = (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter); - if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | + MLX5_ESW_BRIDGE_FLAG_DELETED)) continue; if (time_after(lastuse, entry->lastuse)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index c2c7c70d99eb70..d6f5391619930d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -62,6 +62,9 @@ int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_nu void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index 4911cc32161b42..7c251af566c6fa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -133,6 +133,7 @@ struct mlx5_esw_bridge_mdb_key { enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), + MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2), }; enum { From 92fd39634541eb0a11bf1bafbc8ba92d6ddb8dba Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 6 Sep 2023 21:48:30 +0300 Subject: [PATCH 159/661] net/mlx5: Handle fw tracer change ownership event based on MTRC Currently, whenever fw issues a change ownership event, the PF that owns the fw tracer drops its ownership directly and the other PFs try to pick up the ownership via what MTRC register suggests. In some cases, driver releases the ownership of the tracer and reacquires it later on. Whenever the driver releases ownership of the tracer, fw issues a change ownership event. This event can be delayed and come after driver has reacquired ownership of the tracer. Thus the late event will trigger the tracer owner PF to release the ownership again and lead to a scenario where no PF is owning the tracer. To prevent the scenario described above, when handling a change ownership event, do not drop ownership of the tracer directly, instead read the fw MTRC register to retrieve the up-to-date owner of the tracer and set it accordingly in driver level. Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 7c0f2adbea000d..ad789349c06e6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -848,7 +848,7 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); if (tracer->owner) { - tracer->owner = false; + mlx5_fw_tracer_ownership_acquire(tracer); return; } From be43b7489a3c4702799e50179da69c3df7d6899b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 2 Oct 2023 14:05:29 +0300 Subject: [PATCH 160/661] net/mlx5e: RX, Fix page_pool allocation failure recovery for striding rq When a page allocation fails during refill in mlx5e_post_rx_mpwqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 2436.447717] WARNING: CPU: 1 PID: 2419 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 2436.447895] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.447991] Call Trace: [ 2436.447975] mlx5e_post_rx_mpwqes+0x1d5/0xcf0 [mlx5_core] [ 2436.447994] [ 2436.447996] ? __warn+0x7d/0x120 [ 2436.448009] ? mlx5e_handle_rx_cqe_mpwrq+0x109/0x1d0 [mlx5_core] [ 2436.448002] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448044] ? mlx5e_poll_rx_cq+0x87/0x6e0 [mlx5_core] [ 2436.448061] ? report_bug+0x155/0x180 [ 2436.448065] ? handle_bug+0x36/0x70 [ 2436.448067] ? exc_invalid_op+0x13/0x60 [ 2436.448070] ? asm_exc_invalid_op+0x16/0x20 [ 2436.448079] mlx5e_napi_poll+0x122/0x6b0 [mlx5_core] [ 2436.448077] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448113] ? generic_exec_single+0x35/0x100 [ 2436.448117] __napi_poll+0x25/0x1a0 [ 2436.448120] net_rx_action+0x28a/0x300 [ 2436.448122] __do_softirq+0xcd/0x279 [ 2436.448126] irq_exit_rcu+0x6a/0x90 [ 2436.448128] sysvec_apic_timer_interrupt+0x6e/0x90 [ 2436.448130] This patch fixes the striding rq case by setting the skip flag on all the wqe pages that were expected to have new pages allocated. Fixes: 4c2a13236807 ("net/mlx5e: RX, Defer page release in striding rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3fd11b0761e09e..7988b3a9598c9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -816,6 +816,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) mlx5e_page_release_fragmented(rq, frag_page); } + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + err: rq->stats->buff_alloc_err++; From ef9369e9c30846f5e052a11ccc70e1f6b8dc557a Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 29 Sep 2023 17:31:49 +0300 Subject: [PATCH 161/661] net/mlx5e: RX, Fix page_pool allocation failure recovery for legacy rq When a page allocation fails during refill in mlx5e_refill_rx_wqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 338.326070] WARNING: CPU: 4 PID: 0 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 338.328993] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329094] Call Trace: [ 338.329097] [ 338.329100] ? __warn+0x7d/0x120 [ 338.329105] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329173] ? report_bug+0x155/0x180 [ 338.329179] ? handle_bug+0x3c/0x60 [ 338.329183] ? exc_invalid_op+0x13/0x60 [ 338.329187] ? asm_exc_invalid_op+0x16/0x20 [ 338.329192] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329259] mlx5e_post_rx_wqes+0x210/0x5a0 [mlx5_core] [ 338.329327] ? mlx5e_poll_rx_cq+0x88/0x6f0 [mlx5_core] [ 338.329394] mlx5e_napi_poll+0x127/0x6b0 [mlx5_core] [ 338.329461] __napi_poll+0x25/0x1a0 [ 338.329465] net_rx_action+0x28a/0x300 [ 338.329468] __do_softirq+0xcd/0x279 [ 338.329473] irq_exit_rcu+0x6a/0x90 [ 338.329477] common_interrupt+0x82/0xa0 [ 338.329482] This patch fixes the legacy rq case by releasing all allocated fragments and then setting the skip flag on all released fragments. It is important to note that the number of released fragments will be higher than the number of allocated fragments when an allocation error occurs. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7988b3a9598c9e..8d9743a5e42c7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -457,26 +457,41 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) { int remaining = wqe_bulk; - int i = 0; + int total_alloc = 0; + int refill_alloc; + int refill; /* The WQE bulk is split into smaller bulks that are sized * according to the page pool cache refill size to avoid overflowing * the page pool cache due to too many page releases at once. */ do { - int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - int alloc_count; + refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - mlx5e_free_rx_wqes(rq, ix + i, refill); - alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); - i += alloc_count; - if (unlikely(alloc_count != refill)) - break; + mlx5e_free_rx_wqes(rq, ix + total_alloc, refill); + refill_alloc = mlx5e_alloc_rx_wqes(rq, ix + total_alloc, refill); + if (unlikely(refill_alloc != refill)) + goto err_free; + total_alloc += refill_alloc; remaining -= refill; } while (remaining); - return i; + return total_alloc; + +err_free: + mlx5e_free_rx_wqes(rq, ix, total_alloc + refill_alloc); + + for (int i = 0; i < total_alloc + refill; i++) { + int j = mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + + frag = get_frag(rq, j); + for (int k = 0; k < rq->wqe.info.num_frags; k++, frag++) + frag->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return 0; } static void From aaab619ccd07a32e5b29aa7e59b20de1dcc7a29e Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 25 Sep 2023 17:50:18 +0300 Subject: [PATCH 162/661] net/mlx5e: XDP, Fix XDP_REDIRECT mpwqe page fragment leaks on shutdown When mlx5e_xdp_xmit is called without the XDP_XMIT_FLUSH set it is possible that it leaves a mpwqe session open. That is ok during runtime: the session will be closed on the next call to mlx5e_xdp_xmit. But having a mpwqe session still open at XDP sq close time is problematic: the pc counter is not updated before flushing the contents of the xdpi_fifo. This results in leaking page fragments. The fix is to always close the mpwqe session at the end of mlx5e_xdp_xmit, regardless of the XDP_XMIT_FLUSH flag being set or not. Fixes: 5e0d2eef771e ("net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 12f56d0db0af2f..8bed17d8fe5649 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -874,11 +874,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } out: - if (flags & XDP_XMIT_FLUSH) { - if (sq->mpwqe.wqe) - mlx5e_xdp_mpwqe_complete(sq); + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + if (flags & XDP_XMIT_FLUSH) mlx5e_xmit_xdp_doorbell(sq); - } return nxmit; } From c51c673462a266fb813cf189f8190798a12d3124 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 12 Sep 2023 10:06:24 +0300 Subject: [PATCH 163/661] net/mlx5e: Take RTNL lock before triggering netdev notifiers Hold RTNL lock when calling xdp_set_features() with a registered netdev, as the call triggers the netdev notifiers. This could happen when switching from nic profile to uplink representor for example. Similar logic which fixed a similar scenario was previously introduced in the following commit: commit 72cc65497065 net/mlx5e: Take RTNL lock when needed before calling xdp_set_features(). This fixes the following assertion and warning call trace: RTNL: assertion failed at net/core/dev.c (1961) WARNING: CPU: 13 PID: 2529 at net/core/dev.c:1961 call_netdevice_notifiers_info+0x7c/0x80 Modules linked in: rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_core zram zsmalloc fuse CPU: 13 PID: 2529 Comm: devlink Not tainted 6.5.0_for_upstream_min_debug_2023_09_07_20_04 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:call_netdevice_notifiers_info+0x7c/0x80 Code: 8f ff 80 3d 77 0d 16 01 00 75 c5 ba a9 07 00 00 48 c7 c6 c4 bb 0d 82 48 c7 c7 18 c8 06 82 c6 05 5b 0d 16 01 01 e8 44 f6 8c ff <0f> 0b eb a2 0f 1f 44 00 00 55 48 89 e5 41 54 48 83 e4 f0 48 83 ec RSP: 0018:ffff88819930f7f0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffff8309f740 RCX: 0000000000000027 RDX: ffff88885fb5b5c8 RSI: 0000000000000001 RDI: ffff88885fb5b5c0 RBP: 0000000000000028 R08: ffff88887ffabaa8 R09: 0000000000000003 R10: ffff88887fecbac0 R11: ffff88887ff7bac0 R12: ffff88819930f810 R13: ffff88810b7fea40 R14: ffff8881154e8fd8 R15: ffff888107e881a0 FS: 00007f3ad248f800(0000) GS:ffff88885fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563b85f164e0 CR3: 0000000113b5c006 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x79/0x120 ? call_netdevice_notifiers_info+0x7c/0x80 ? report_bug+0x17c/0x190 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? call_netdevice_notifiers_info+0x7c/0x80 call_netdevice_notifiers+0x2e/0x50 mlx5e_set_xdp_feature+0x21/0x50 [mlx5_core] mlx5e_build_rep_params+0x97/0x130 [mlx5_core] mlx5e_init_ul_rep+0x9f/0x100 [mlx5_core] mlx5e_netdev_init_profile+0x76/0x110 [mlx5_core] mlx5e_netdev_attach_profile+0x1f/0x90 [mlx5_core] mlx5e_netdev_change_profile+0x92/0x160 [mlx5_core] mlx5e_vport_rep_load+0x329/0x4a0 [mlx5_core] mlx5_esw_offloads_rep_load+0x9e/0xf0 [mlx5_core] esw_offloads_enable+0x4bc/0xe90 [mlx5_core] mlx5_eswitch_enable_locked+0x3c8/0x570 [mlx5_core] ? kmalloc_trace+0x25/0x80 mlx5_devlink_eswitch_mode_set+0x224/0x680 [mlx5_core] ? devlink_get_from_attrs_lock+0x9e/0x110 devlink_nl_cmd_eswitch_set_doit+0x60/0xe0 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x180/0x2b0 ? devlink_get_from_attrs_lock+0x110/0x110 ? devlink_nl_cmd_eswitch_get_doit+0x290/0x290 ? devlink_pernet_pre_exit+0xf0/0xf0 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1fc/0x2c0 netlink_sendmsg+0x232/0x4a0 sock_sendmsg+0x38/0x60 ? _copy_from_user+0x2a/0x60 __sys_sendto+0x110/0x160 ? handle_mm_fault+0x161/0x260 ? do_user_addr_fault+0x276/0x620 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3ad231340a Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffd70aad4b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000c36b00 RCX:00007f3ad231340a RDX: 0000000000000038 RSI: 0000000000c36b00 RDI: 0000000000000003 RBP: 0000000000c36910 R08: 00007f3ad2625200 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2fdb8895aecd7b..5ca9bc337dc6ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -769,6 +769,7 @@ static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) static void mlx5e_build_rep_params(struct net_device *netdev) { + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; @@ -794,8 +795,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + /* If netdev is already registered (e.g. move from nic profile to uplink, + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); From 06b4eac9c4beda520b8a4dbbb8e33dba9d1c8fba Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 12 Sep 2023 02:28:47 +0000 Subject: [PATCH 164/661] net/mlx5e: Don't offload internal port if filter device is out device In the cited commit, if the routing device is ovs internal port, the out device is set to uplink, and packets go out after encapsulation. If filter device is uplink, it can trigger the following syndrome: mlx5_core 0000:08:00.0: mlx5_cmd_out_err:803:(pid 3966): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xcdb051), err(-22) Fix this issue by not offloading internal port if filter device is out device. In this case, packets are not forwarded to the root table to be processed, the termination table is used instead to forward them from uplink to uplink. Fixes: 100ad4e2d758 ("net/mlx5e: Offload internal port as encap route device") Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 1730f6a716eeab..b10e40e1a9c141 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -24,7 +24,8 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev)) + if (!route_dev || !netif_is_ovs_master(route_dev) || + attr->parse_attr->filter_dev == e->out_dev) goto out; err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, From 80f1241484dd1b1d4eab1a0211d52ec2bd83e2f1 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Mon, 4 Sep 2023 18:26:47 +0300 Subject: [PATCH 165/661] net/mlx5e: Fix VF representors reporting zero counters to "ip -s" command Although vf_vport entry of struct mlx5e_stats is never updated, its values are mistakenly copied to the caller structure in the VF representor .ndo_get_stat_64 callback mlx5e_rep_get_stats(). Remove redundant entry and use the updated one, rep_stats, instead. Fixes: 64b68e369649 ("net/mlx5: Refactor and expand rep vport stat group") Reviewed-by: Patrisious Haddad Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 11 ++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5ca9bc337dc6ae..fd1cce542b680f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -701,7 +701,7 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) /* update HW stats in background for next time */ mlx5e_queue_update_stats(priv); - memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); + mlx5e_stats_copy_rep_stats(stats, &priv->stats.rep_stats); } static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 176fa5976259fb..477c547dcc04a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -484,11 +484,20 @@ struct mlx5e_stats { struct mlx5e_vnic_env_stats vnic; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct rtnl_link_stats64 vf_vport; struct mlx5e_pcie_stats pcie; struct mlx5e_rep_stats rep_stats; }; +static inline void mlx5e_stats_copy_rep_stats(struct rtnl_link_stats64 *vf_vport, + struct mlx5e_rep_stats *rep_stats) +{ + memset(vf_vport, 0, sizeof(*vf_vport)); + vf_vport->rx_packets = rep_stats->vport_rx_packets; + vf_vport->tx_packets = rep_stats->vport_tx_packets; + vf_vport->rx_bytes = rep_stats->vport_rx_bytes; + vf_vport->tx_bytes = rep_stats->vport_tx_bytes; +} + extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c24828b688ac0a..c8590483ddc64b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4972,7 +4972,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, if (err) return err; - rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, + &priv->stats.rep_stats); break; default: NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); @@ -5012,7 +5013,7 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, u64 dbytes; u64 dpkts; - cur_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats); dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; rpriv->prev_vf_vport_stats = cur_stats; From d873a364ef2182af40110869f9c62813ce6f9386 Mon Sep 17 00:00:00 2001 From: Ammar Faizi Date: Wed, 30 Aug 2023 08:02:23 +0700 Subject: [PATCH 166/661] tools/nolibc: i386: Fix a stack misalign bug on _start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ABI mandates that the %esp register must be a multiple of 16 when executing a 'call' instruction. Commit 2ab446336b17 ("tools/nolibc: i386: shrink _start with _start_c") simplified the _start function, but it didn't take care of the %esp alignment, causing SIGSEGV on SSE and AVX programs that use aligned move instruction (e.g., movdqa, movaps, and vmovdqa). The 'and $-16, %esp' aligns the %esp at a multiple of 16. Then 'push %eax' will subtract the %esp by 4; thus, it breaks the 16-byte alignment. Make sure the %esp is correctly aligned after the push by subtracting 12 before the push. Extra: Add 'add $12, %esp' before the 'and $-16, %esp' to avoid over-estimating for particular cases as suggested by Willy. A test program to validate the %esp alignment on _start can be found at: https://lore.kernel.org/lkml/ZOoindMFj1UKqo+s@biznet-home.integral.gnuweeb.org [ Thomas: trim Fixes tag commit id ] Cc: Zhangjin Wu Fixes: 2ab446336b17 ("tools/nolibc: i386: shrink _start with _start_c") Reported-by: Nicholas Rosenberg Acked-by: Thomas Weißschuh Signed-off-by: Ammar Faizi Reviewed-by: Alviro Iskandar Setiawan Signed-off-by: Willy Tarreau Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-i386.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h index 64415b9fac77f9..28c26a00a7625f 100644 --- a/tools/include/nolibc/arch-i386.h +++ b/tools/include/nolibc/arch-i386.h @@ -167,7 +167,9 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "and $-16, %esp\n" /* last pushed argument must be 16-byte aligned */ + "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ + "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ "push %eax\n" /* push arg1 on stack to support plain stack modes too */ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ From 513bd2788e4994f6187d22b2fd0d25c3cfbeb87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 16 Sep 2023 00:08:54 +0200 Subject: [PATCH 167/661] MAINTAINERS: nolibc: update tree location MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nolibc tree moved out of Willys user namespace into its own. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20230916-nolibc-tree-v1-1-06c9b59a5035@weissschuh.net --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 90f13281d29708..6c83087ea396bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15131,7 +15131,7 @@ NOLIBC HEADER FILE M: Willy Tarreau M: Thomas Weißschuh S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/wtarreau/nolibc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/nolibc/linux-nolibc.git F: tools/include/nolibc/ F: tools/testing/selftests/nolibc/ From 921992229b1f06df6b649860e4a5f3def1489866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 12 Oct 2023 00:37:38 +0200 Subject: [PATCH 168/661] tools/nolibc: mark start_c as weak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise the different instances of _start_c from each compilation unit will lead to linker errors: /usr/bin/ld: /tmp/ccSNvRqs.o: in function `_start_c': nolibc-test-foo.c:(.text.nolibc_memset+0x9): multiple definition of `_start_c'; /tmp/ccG25101.o:nolibc-test.c:(.text+0x1ea3): first defined here Fixes: 17336755150b ("tools/nolibc: add new crt.h with _start_c") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/lkml/20231012-nolibc-start_c-multiple-v1-1-fbfc73e0283f@weissschuh.net/ Link: https://lore.kernel.org/lkml/20231012-nolibc-linkage-test-v1-1-315e682768b4@weissschuh.net/ Acked-by: Willy Tarreau --- tools/include/nolibc/crt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index a5f33fef167204..a05655b4ce1d7f 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -13,6 +13,7 @@ const unsigned long *_auxv __attribute__((weak)); static void __stack_chk_init(void); static void exit(int); +__attribute__((weak)) void _start_c(long *sp) { long argc; From 4366faf43308bd91c59a20c43a9f853a9c3bb6e4 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 11 Oct 2023 13:41:34 +0200 Subject: [PATCH 169/661] drm/nouveau/disp: fix DP capable DSM connectors Just special case DP DSM connectors until we properly figure out how to deal with this. This resolves user regressions on GPUs with such connectors without reverting the original fix. Cc: Lyude Paul Cc: stable@vger.kernel.org # 6.4+ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/255 Fixes: 2b5d1c29f6c4 ("drm/nouveau/disp: PIOR DP uses GPIO for HPD, not PMGR AUX interrupts") Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20231011114134.861818-1-kherbst@redhat.com --- drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c index 46b057fe1412ec..3249e5c1c89308 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c @@ -62,6 +62,18 @@ nvkm_uconn_uevent_gpio(struct nvkm_object *object, u64 token, u32 bits) return object->client->event(token, &args, sizeof(args.v0)); } +static bool +nvkm_connector_is_dp_dms(u8 type) +{ + switch (type) { + case DCB_CONNECTOR_DMS59_DP0: + case DCB_CONNECTOR_DMS59_DP1: + return true; + default: + return false; + } +} + static int nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent) { @@ -101,7 +113,7 @@ nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_ if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO; if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ) { /* TODO: support DP IRQ on ANX9805 and remove this hack. */ - if (!outp->info.location) + if (!outp->info.location && !nvkm_connector_is_dp_dms(conn->info.type)) return -EINVAL; } From 17bfcd6a81535d7d580ddafb6e54806890aaca6c Mon Sep 17 00:00:00 2001 From: Wedson Almeida Filho Date: Sat, 30 Sep 2023 11:49:58 -0300 Subject: [PATCH 170/661] rust: error: fix the description for `ECHILD` A mistake was made and the description of `ECHILD` is wrong (it reuses the description of `ENOEXEC`). This fixes it to reflect what's in `errno-base.h`. Signed-off-by: Wedson Almeida Filho Reviewed-by: Martin Rodriguez Reboredo Reviewed-by: Trevor Gross Reviewed-by: Finn Behrens Reviewed-by: Alice Ryhl Fixes: 266def2a0f5b ("rust: error: add codes from `errno-base.h`") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230930144958.46051-1-wedsonaf@gmail.com [ Use the plural, as noticed by Benno. ] Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 05fcab6abfe63e..0f29e7b2194c0a 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -37,7 +37,7 @@ pub mod code { declare_err!(E2BIG, "Argument list too long."); declare_err!(ENOEXEC, "Exec format error."); declare_err!(EBADF, "Bad file number."); - declare_err!(ECHILD, "Exec format error."); + declare_err!(ECHILD, "No child processes."); declare_err!(EAGAIN, "Try again."); declare_err!(ENOMEM, "Out of memory."); declare_err!(EACCES, "Permission denied."); From 2a7e0a52ec98566a863aba42ea35690c65e3da27 Mon Sep 17 00:00:00 2001 From: Manmohan Shukla Date: Thu, 7 Sep 2023 02:18:57 +0530 Subject: [PATCH 171/661] rust: error: Markdown style nit This patch fixes a trivial markdown style nit in the `SAFETY` comment. Signed-off-by: Manmohan Shukla Reviewed-by: Alice Ryhl Reviewed-by: Jianguo Bao Reviewed-by: Vincenzo Palazzo Reviewed-by: Finn Behrens Reviewed-by: Benno Lossin Reviewed-by: Andreas Hindborg Fixes: c7e20faa5fca ("rust: error: Add Error::to_ptr()") Link: https://lore.kernel.org/r/20230906204857.85619-1-manmshuk@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 0f29e7b2194c0a..032b6454395395 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -133,7 +133,7 @@ impl Error { /// Returns the error encoded as a pointer. #[allow(dead_code)] pub(crate) fn to_ptr(self) -> *mut T { - // SAFETY: self.0 is a valid error due to its invariant. + // SAFETY: `self.0` is a valid error due to its invariant. unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ } } From 344b6c0a7514b044ed12b8ad3cdeecd262292f3e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Tue, 15 Aug 2023 08:53:46 +0200 Subject: [PATCH 172/661] rust: fix bindgen build error with fstrict-flex-arrays Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") enabled '-fstrict-flex-arrays=3' globally, but bindgen does not recognized this compiler option, triggering the following build error: error: unknown argument: '-fstrict-flex-arrays=3', err: true [ Miguel: Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") did it so only conditionally (i.e. only if the C compiler supports it). This explains what Andrea was seeing: he was compiling with a modern enough GCC, which enables the option, but with an old enough Clang. Andrea confirmed this was the case: he was using Clang 14 with GCC 13; and that Clang 15 worked for him. While it is possible to construct code (see mailing list for an example I came up with) where this could break, it is fairly contrived, and anyway GCC-built kernels with Rust enabled should only be used for experimentation until we get support for `rustc_codegen_gcc` and/or GCC Rust. So let's add this for the time being in case it helps somebody. ] Add '-fstrict-flex-arrays' to the list of cflags that should be ignored by bindgen. Fixes: df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") Signed-off-by: Andrea Righi Tested-by: Gary Guo Link: https://lore.kernel.org/r/20230815065346.131387-1-andrea.righi@canonical.com Signed-off-by: Miguel Ojeda --- rust/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/Makefile b/rust/Makefile index 87958e864be025..1e78c82a18a87d 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -290,6 +290,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ + -fstrict-flex-arrays=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. From 601cb6d573facde5fc88efa935b074da64ae63c9 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 12 Oct 2023 10:37:29 +0200 Subject: [PATCH 173/661] clk: socfpga: gate: Account for the divider in determine_rate Commit 9607beb917df ("clk: socfpga: gate: Add a determine_rate hook") added a determine_rate implementation set to the clk_hw_determine_rate_no_reparent, but failed to account for the internal divider that wasn't used before anywhere but in recalc_rate. This led to inconsistencies between the clock rate stored in clk_core->rate and the one returned by clk_round_rate() that leverages determine_rate(). Since that driver seems to be widely used (and thus regression-prone) and not supporting rate changes (since it's missing a .set_rate implementation), we can just report the current divider programmed in the clock but not try to change it in any way. This should be good enough to fix the issues reported, and if someone ever wants to allow the divider to change then it should be easy enough using the clk-divider helpers. Link: https://lore.kernel.org/linux-clk/20231005095927.12398-2-b.spranger@linutronix.de/ Fixes: 9607beb917df ("clk: socfpga: gate: Add a determine_rate hook") Reported-by: Benedikt Spranger Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20231012083729.2148044-1-mripard@kernel.org [sboyd@kernel.org: Fix hw -> hwclk] Signed-off-by: Stephen Boyd --- drivers/clk/socfpga/clk-gate.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/clk/socfpga/clk-gate.c b/drivers/clk/socfpga/clk-gate.c index 8dd601bd85389f..0a5a95e0267ff4 100644 --- a/drivers/clk/socfpga/clk-gate.c +++ b/drivers/clk/socfpga/clk-gate.c @@ -87,10 +87,8 @@ static int socfpga_clk_set_parent(struct clk_hw *hwclk, u8 parent) return 0; } -static unsigned long socfpga_clk_recalc_rate(struct clk_hw *hwclk, - unsigned long parent_rate) +static u32 socfpga_clk_get_div(struct socfpga_gate_clk *socfpgaclk) { - struct socfpga_gate_clk *socfpgaclk = to_socfpga_gate_clk(hwclk); u32 div = 1, val; if (socfpgaclk->fixed_div) @@ -105,12 +103,33 @@ static unsigned long socfpga_clk_recalc_rate(struct clk_hw *hwclk, div = (1 << val); } + return div; +} + +static unsigned long socfpga_clk_recalc_rate(struct clk_hw *hwclk, + unsigned long parent_rate) +{ + struct socfpga_gate_clk *socfpgaclk = to_socfpga_gate_clk(hwclk); + u32 div = socfpga_clk_get_div(socfpgaclk); + return parent_rate / div; } + +static int socfpga_clk_determine_rate(struct clk_hw *hwclk, + struct clk_rate_request *req) +{ + struct socfpga_gate_clk *socfpgaclk = to_socfpga_gate_clk(hwclk); + u32 div = socfpga_clk_get_div(socfpgaclk); + + req->rate = req->best_parent_rate / div; + + return 0; +} + static struct clk_ops gateclk_ops = { .recalc_rate = socfpga_clk_recalc_rate, - .determine_rate = clk_hw_determine_rate_no_reparent, + .determine_rate = socfpga_clk_determine_rate, .get_parent = socfpga_clk_get_parent, .set_parent = socfpga_clk_set_parent, }; From ceb87a361d0b079ecbc7d2831618c19087f304a9 Mon Sep 17 00:00:00 2001 From: Alessandro Carminati Date: Thu, 21 Sep 2023 07:32:17 +0000 Subject: [PATCH 174/661] clk: Sanitize possible_parent_show to Handle Return Value of of_clk_get_parent_name In the possible_parent_show function, ensure proper handling of the return value from of_clk_get_parent_name to prevent potential issues arising from a NULL return. The current implementation invokes seq_puts directly on the result of of_clk_get_parent_name without verifying the return value, which can lead to kernel panic if the function returns NULL. This patch addresses the concern by introducing a check on the return value of of_clk_get_parent_name. If the return value is not NULL, the function proceeds to call seq_puts, providing the returned value as argument. However, if of_clk_get_parent_name returns NULL, the function provides a static string as argument, avoiding the panic. Fixes: 1ccc0ddf046a ("clk: Use seq_puts() in possible_parent_show()") Reported-by: Philip Daly Signed-off-by: Alessandro Carminati (Red Hat) Link: https://lore.kernel.org/r/20230921073217.572151-1-alessandro.carminati@gmail.com Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c249f9791ae864..473563bc74960f 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3416,6 +3416,7 @@ static void possible_parent_show(struct seq_file *s, struct clk_core *core, unsigned int i, char terminator) { struct clk_core *parent; + const char *name = NULL; /* * Go through the following options to fetch a parent's name. @@ -3430,18 +3431,20 @@ static void possible_parent_show(struct seq_file *s, struct clk_core *core, * registered (yet). */ parent = clk_core_get_parent_by_index(core, i); - if (parent) + if (parent) { seq_puts(s, parent->name); - else if (core->parents[i].name) + } else if (core->parents[i].name) { seq_puts(s, core->parents[i].name); - else if (core->parents[i].fw_name) + } else if (core->parents[i].fw_name) { seq_printf(s, "<%s>(fw)", core->parents[i].fw_name); - else if (core->parents[i].index >= 0) - seq_puts(s, - of_clk_get_parent_name(core->of_node, - core->parents[i].index)); - else - seq_puts(s, "(missing)"); + } else { + if (core->parents[i].index >= 0) + name = of_clk_get_parent_name(core->of_node, core->parents[i].index); + if (!name) + name = "(missing)"; + + seq_puts(s, name); + } seq_putc(s, terminator); } From 790437bbe0ef7e5cb5d091dd711c0d61d03945a5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 Oct 2023 16:35:28 +0300 Subject: [PATCH 175/661] clk: stm32: Fix a signedness issue in clk_stm32_composite_determine_rate() The divider_ro_round_rate() function could potentially return -EINVAL on error but the error handling doesn't work because "rate" is unsigned. It should be a type long. Fixes: 06ed0fc0fbac ("clk: stm32: composite: Switch to determine_rate") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/d9a78453-9b40-48c1-830e-00751ba3ecb8@kili.mountain Acked-by: Maxime Ripard Signed-off-by: Stephen Boyd --- drivers/clk/stm32/clk-stm32-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/stm32/clk-stm32-core.c b/drivers/clk/stm32/clk-stm32-core.c index d5aa09e9fce4c2..067b918a889456 100644 --- a/drivers/clk/stm32/clk-stm32-core.c +++ b/drivers/clk/stm32/clk-stm32-core.c @@ -431,7 +431,7 @@ static int clk_stm32_composite_determine_rate(struct clk_hw *hw, { struct clk_stm32_composite *composite = to_clk_stm32_composite(hw); const struct stm32_div_cfg *divider; - unsigned long rate; + long rate; if (composite->div_id == NO_STM32_DIV) return 0; From de5724ca38fd5e442bae9c1fab31942b6544012d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Oct 2023 10:24:29 +0000 Subject: [PATCH 176/661] xfrm: fix a data-race in xfrm_lookup_with_ifid() syzbot complains about a race in xfrm_lookup_with_ifid() [1] When preparing commit 0a9e5794b21e ("xfrm: annotate data-race around use_time") I thought xfrm_lookup_with_ifid() was modifying a still private structure. [1] BUG: KCSAN: data-race in xfrm_lookup_with_ifid / xfrm_lookup_with_ifid write to 0xffff88813ea41108 of 8 bytes by task 8150 on cpu 1: xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 write to 0xffff88813ea41108 of 8 bytes by task 15867 on cpu 0: xfrm_lookup_with_ifid+0xce7/0x12d0 net/xfrm/xfrm_policy.c:3218 xfrm_lookup net/xfrm/xfrm_policy.c:3270 [inline] xfrm_lookup_route+0x3b/0x100 net/xfrm/xfrm_policy.c:3281 ip6_dst_lookup_flow+0x98/0xc0 net/ipv6/ip6_output.c:1246 send6+0x241/0x3c0 drivers/net/wireguard/socket.c:139 wg_socket_send_skb_to_peer+0xbd/0x130 drivers/net/wireguard/socket.c:178 wg_socket_send_buffer_to_peer+0xd6/0x100 drivers/net/wireguard/socket.c:200 wg_packet_send_handshake_initiation drivers/net/wireguard/send.c:40 [inline] wg_packet_handshake_send_worker+0x10c/0x150 drivers/net/wireguard/send.c:51 process_one_work kernel/workqueue.c:2630 [inline] process_scheduled_works+0x5b8/0xa30 kernel/workqueue.c:2703 worker_thread+0x525/0x730 kernel/workqueue.c:2784 kthread+0x1d7/0x210 kernel/kthread.c:388 ret_from_fork+0x48/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:304 value changed: 0x00000000651cd9d1 -> 0x00000000651cd9d2 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 15867 Comm: kworker/u4:58 Not tainted 6.6.0-rc4-syzkaller-00016-g5e62ed3b1c8a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 Workqueue: wg-kex-wg2 wg_packet_handshake_send_worker Fixes: 0a9e5794b21e ("xfrm: annotate data-race around use_time") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 99df2862bdc951..d24b4d4f620ea0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3220,7 +3220,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net, } for (i = 0; i < num_pols; i++) - pols[i]->curlft.use_time = ktime_get_real_seconds(); + WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds()); if (num_xfrms < 0) { /* Prohibit the flow */ From 5447da5d610b5701c1103cd4665b49da87fdf032 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 11 Oct 2023 19:49:55 +0200 Subject: [PATCH 177/661] ARM: OMAP1: ams-delta: Fix MODEM initialization failure Regulator drivers were modified to use asynchronous device probe. Since then, the board .init_late hook fails to acquire a GPIO based fixed regulator needed by an on-board voice MODEM device, and unregisters the MODEM. That in turn triggers a so far not discovered bug of device unregister function called for a device with no associated release() op. serial8250 serial8250.1: incomplete constraints, dummy supplies not allowed WARNING: CPU: 0 PID: 1 at drivers/base/core.c:2486 device_release+0x98/0xa8 Device 'serial8250.1' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst. ... put_device from platform_device_put+0x1c/0x24 platform_device_put from ams_delta_init_late+0x4c/0x68 ams_delta_init_late from init_machine_late+0x1c/0x94 init_machine_late from do_one_initcall+0x60/0x1d4 As a consequence, ASoC CODEC driver is no longer able to control its device over the voice MODEM's tty interface. cx20442-codec: ASoC: error at soc_component_write_no_lock on cx20442-codec for register: [0x00000000] -5 cx20442-codec: ASoC: error at snd_soc_component_update_bits_legacy on cx20442-codec for register: [0x00000000] -5 cx20442-codec: ASoC: error at snd_soc_component_update_bits on cx20442-codec for register: [0x00000000] -5 The regulator hangs of a GPIO pin controlled by basic-mmio-gpio driver. Unlike most GPIO drivers, that driver doesn't probe for devices before device_initcall, then GPIO pins under its control are not availabele to majority of devices probed at that phase, including regulators. On the other hand, serial8250 driver used by the MODEM device neither accepts via platform data nor handles regulators, then the board file is not able to teach that driver to return -EPROBE_DEFER when the regulator is not ready so the failed probe is retried after late_initcall. Resolve the issue by extending description of the MODEM device with a dedicated power management domain. Acquire the regulator from the domain's .activate hook and return -EPROBE_DEFER if the regulator is not available. Having that under control, add the regulator device description to the list of platform devices initialized from .init_machine and drop the no longer needed custom .init_late hook. v2: Trim down the warning for prettier git log output (Tony). Fixes: 259b93b21a9f ("regulator: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in 4.14") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org # v6.4+ Message-ID: <20231011175038.1907629-1-jmkrzyszt@gmail.com> Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-ams-delta.c | 60 +++++++-------------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 9808cd27e2cf93..67de96c7717db1 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -550,6 +550,7 @@ static struct platform_device *ams_delta_devices[] __initdata = { &ams_delta_nand_device, &ams_delta_lcd_device, &cx20442_codec_device, + &modem_nreset_device, }; static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = { @@ -782,26 +783,28 @@ static struct plat_serial8250_port ams_delta_modem_ports[] = { { }, }; +static int ams_delta_modem_pm_activate(struct device *dev) +{ + modem_priv.regulator = regulator_get(dev, "RESET#"); + if (IS_ERR(modem_priv.regulator)) + return -EPROBE_DEFER; + + return 0; +} + +static struct dev_pm_domain ams_delta_modem_pm_domain = { + .activate = ams_delta_modem_pm_activate, +}; + static struct platform_device ams_delta_modem_device = { .name = "serial8250", .id = PLAT8250_DEV_PLATFORM1, .dev = { .platform_data = ams_delta_modem_ports, + .pm_domain = &ams_delta_modem_pm_domain, }, }; -static int __init modem_nreset_init(void) -{ - int err; - - err = platform_device_register(&modem_nreset_device); - if (err) - pr_err("Couldn't register the modem regulator device\n"); - - return err; -} - - /* * This function expects MODEM IRQ number already assigned to the port. * The MODEM device requires its RESET# pin kept high during probe. @@ -833,37 +836,6 @@ static int __init ams_delta_modem_init(void) } arch_initcall_sync(ams_delta_modem_init); -static int __init late_init(void) -{ - int err; - - err = modem_nreset_init(); - if (err) - return err; - - /* - * Once the modem device is registered, the modem_nreset - * regulator can be requested on behalf of that device. - */ - modem_priv.regulator = regulator_get(&ams_delta_modem_device.dev, - "RESET#"); - if (IS_ERR(modem_priv.regulator)) { - err = PTR_ERR(modem_priv.regulator); - goto unregister; - } - return 0; - -unregister: - platform_device_unregister(&ams_delta_modem_device); - return err; -} - -static void __init ams_delta_init_late(void) -{ - omap1_init_late(); - late_init(); -} - static void __init ams_delta_map_io(void) { omap1_map_io(); @@ -877,7 +849,7 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)") .init_early = omap1_init_early, .init_irq = omap1_init_irq, .init_machine = ams_delta_init, - .init_late = ams_delta_init_late, + .init_late = omap1_init_late, .init_time = omap1_timer_init, .restart = omap1_restart, MACHINE_END From cc2d819dd7df94a72bde7b9b9331a6535084092d Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 11 Oct 2023 10:15:56 +0300 Subject: [PATCH 178/661] clk: ti: Fix missing omap4 mcbsp functional clock and aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are using a wrong mcbsp functional clock. The interconnect target module driver provided clock for mcbsp is not same as the mcbsp functional clock known as the gfclk main_clk. The mcbsp functional clocks for mcbsp should have been added before we dropped the legacy platform data. Additionally we are also missing the clock aliases for the clocks used by the audio driver if reparenting is needed. This causes audio driver errors like "CLKS: could not clk_get() prcm_fck" for mcbsp as reported by Andreas. The mcbsp clock aliases too should have been added before we dropped the legacy platform data. Let's add the clocks and aliases with a single patch to fix the issue. Fixes: 349355ce3a05 ("ARM: OMAP2+: Drop legacy platform data for omap4 mcbsp") Reported-by: Andreas Kemnade Reported-by: Péter Ujfalusi Acked-by: Stephen Boyd Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi | 6 ++++++ arch/arm/boot/dts/ti/omap/omap4-l4.dtsi | 2 ++ drivers/clk/ti/clk-44xx.c | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi b/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi index 7ae8b620515c54..59f546a278f87c 100644 --- a/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap4-l4-abe.dtsi @@ -109,6 +109,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49022000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP4_MCBSP1_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; @@ -142,6 +144,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49024000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP4_MCBSP2_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; @@ -175,6 +179,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49026000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP4_MCBSP3_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; diff --git a/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi b/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi index 46b8f9efd41316..3fcef3080eaec8 100644 --- a/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap4-l4.dtsi @@ -2043,6 +2043,8 @@ compatible = "ti,omap4-mcbsp"; reg = <0x0 0xff>; /* L4 Interconnect */ reg-names = "mpu"; + clocks = <&l4_per_clkctrl OMAP4_MCBSP4_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; diff --git a/drivers/clk/ti/clk-44xx.c b/drivers/clk/ti/clk-44xx.c index 868bc7af21b0b3..9b2824ed785b99 100644 --- a/drivers/clk/ti/clk-44xx.c +++ b/drivers/clk/ti/clk-44xx.c @@ -749,9 +749,14 @@ static struct ti_dt_clk omap44xx_clks[] = { DT_CLK(NULL, "mcbsp1_sync_mux_ck", "abe-clkctrl:0028:26"), DT_CLK(NULL, "mcbsp2_sync_mux_ck", "abe-clkctrl:0030:26"), DT_CLK(NULL, "mcbsp3_sync_mux_ck", "abe-clkctrl:0038:26"), + DT_CLK("40122000.mcbsp", "prcm_fck", "abe-clkctrl:0028:26"), + DT_CLK("40124000.mcbsp", "prcm_fck", "abe-clkctrl:0030:26"), + DT_CLK("40126000.mcbsp", "prcm_fck", "abe-clkctrl:0038:26"), DT_CLK(NULL, "mcbsp4_sync_mux_ck", "l4-per-clkctrl:00c0:26"), + DT_CLK("48096000.mcbsp", "prcm_fck", "l4-per-clkctrl:00c0:26"), DT_CLK(NULL, "ocp2scp_usb_phy_phy_48m", "l3-init-clkctrl:00c0:8"), DT_CLK(NULL, "otg_60m_gfclk", "l3-init-clkctrl:0040:24"), + DT_CLK(NULL, "pad_fck", "pad_clks_ck"), DT_CLK(NULL, "per_mcbsp4_gfclk", "l4-per-clkctrl:00c0:24"), DT_CLK(NULL, "pmd_stm_clock_mux_ck", "emu-sys-clkctrl:0000:20"), DT_CLK(NULL, "pmd_trace_clk_mux_ck", "emu-sys-clkctrl:0000:22"), From 0b9a4a67c60d3e15b39a69d480a50ce7eeff9bc1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 11 Oct 2023 10:15:56 +0300 Subject: [PATCH 179/661] clk: ti: Fix missing omap5 mcbsp functional clock and aliases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are using a wrong mcbsp functional clock. The interconnect target module driver provided clock for mcbsp is not same as the mcbsp functional clock known as the gfclk main_clk. The mcbsp functional clocks for mcbsp should have been added before we dropped the legacy platform data. Additionally we are also missing the clock aliases for the clocks used by the audio driver if reparenting is needed. This causes audio driver errors like "CLKS: could not clk_get() prcm_fck" for mcbsp as reported by Andreas. The mcbsp clock aliases too should have been added before we dropped the legacy platform data. Let's add the clocks and aliases with a single patch to fix the issue similar to omap4. On omap5, there is no mcbsp4 instance on the l4_per interconnect. Fixes: b1da0fa21bd1 ("ARM: OMAP2+: Drop legacy platform data for omap5 mcbsp") Cc: H. Nikolaus Schaller Reported-by: Andreas Kemnade Reported-by: Péter Ujfalusi Acked-by: Stephen Boyd Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi | 6 ++++++ drivers/clk/ti/clk-54xx.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi b/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi index a03bca5a358441..97b0c3b5f573f7 100644 --- a/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi +++ b/arch/arm/boot/dts/ti/omap/omap5-l4-abe.dtsi @@ -109,6 +109,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49022000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP5_MCBSP1_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; @@ -142,6 +144,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49024000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP5_MCBSP2_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; @@ -175,6 +179,8 @@ reg = <0x0 0xff>, /* MPU private access */ <0x49026000 0xff>; /* L3 Interconnect */ reg-names = "mpu", "dma"; + clocks = <&abe_clkctrl OMAP5_MCBSP3_CLKCTRL 24>; + clock-names = "fck"; interrupts = ; interrupt-names = "common"; ti,buffer-size = <128>; diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c index b4aff76eb3735e..74dfd5823f835b 100644 --- a/drivers/clk/ti/clk-54xx.c +++ b/drivers/clk/ti/clk-54xx.c @@ -565,15 +565,19 @@ static struct ti_dt_clk omap54xx_clks[] = { DT_CLK(NULL, "gpio8_dbclk", "l4per-clkctrl:00f8:8"), DT_CLK(NULL, "mcbsp1_gfclk", "abe-clkctrl:0028:24"), DT_CLK(NULL, "mcbsp1_sync_mux_ck", "abe-clkctrl:0028:26"), + DT_CLK("40122000.mcbsp", "prcm_fck", "abe-clkctrl:0028:26"), DT_CLK(NULL, "mcbsp2_gfclk", "abe-clkctrl:0030:24"), DT_CLK(NULL, "mcbsp2_sync_mux_ck", "abe-clkctrl:0030:26"), + DT_CLK("40124000.mcbsp", "prcm_fck", "abe-clkctrl:0030:26"), DT_CLK(NULL, "mcbsp3_gfclk", "abe-clkctrl:0038:24"), DT_CLK(NULL, "mcbsp3_sync_mux_ck", "abe-clkctrl:0038:26"), + DT_CLK("40126000.mcbsp", "prcm_fck", "abe-clkctrl:0038:26"), DT_CLK(NULL, "mmc1_32khz_clk", "l3init-clkctrl:0008:8"), DT_CLK(NULL, "mmc1_fclk", "l3init-clkctrl:0008:25"), DT_CLK(NULL, "mmc1_fclk_mux", "l3init-clkctrl:0008:24"), DT_CLK(NULL, "mmc2_fclk", "l3init-clkctrl:0010:25"), DT_CLK(NULL, "mmc2_fclk_mux", "l3init-clkctrl:0010:24"), + DT_CLK(NULL, "pad_fck", "pad_clks_ck"), DT_CLK(NULL, "sata_ref_clk", "l3init-clkctrl:0068:8"), DT_CLK(NULL, "timer10_gfclk_mux", "l4per-clkctrl:0008:24"), DT_CLK(NULL, "timer11_gfclk_mux", "l4per-clkctrl:0010:24"), From 6a7be48e9bd18d309ba25c223a27790ad1bf0fa3 Mon Sep 17 00:00:00 2001 From: Fabio Porcedda Date: Tue, 5 Sep 2023 09:37:24 +0200 Subject: [PATCH 180/661] USB: serial: option: add Telit LE910C4-WWX 0x1035 composition Add support for the following Telit LE910C4-WWX composition: 0x1035: TTY, TTY, ECM T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=1035 Rev=00.00 S: Manufacturer=Telit S: Product=LE910C4-WWX S: SerialNumber=e1b117c7 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 1 Cls=02(commc) Sub=06 Prot=00 Driver=cdc_ether E: Ad=85(I) Atr=03(Int.) MxPS= 64 Ivl=2ms I: If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Fabio Porcedda Cc: stable@vger.kernel.org Reviewed-by: Daniele Palmas Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7994a4549a6c80..f612bc9129bd42 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1290,6 +1290,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1033, 0xff), /* Telit LE910C1-EUX (ECM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1035, 0xff) }, /* Telit LE910C4-WWX (ECM) */ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG0), .driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG1), From 064f6e2ba9eb59b2c87b866e1e968e79ccedf9dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Mon, 2 Oct 2023 17:51:40 +0200 Subject: [PATCH 181/661] USB: serial: option: add entry for Sierra EM9191 with new firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following a firmware update of the modem, the interface for the AT command port changed, so add it back. T: Bus=08 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=1199 ProdID=90d3 Rev=00.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM9191 S: SerialNumber=xxxxxxxxxxxxxxxx C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f612bc9129bd42..c0908e6d49210b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2263,6 +2263,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { } /* Terminating entry */ From ff07186b4d774ac22a5345d30763045af4569416 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 11 Oct 2023 22:25:28 +0300 Subject: [PATCH 182/661] x86/efistub: Don't try to print after ExitBootService() setup_e820() is executed after UEFI's ExitBootService has been called. This causes the firmware to throw an exception because the Console IO protocol is supposed to work only during boot service environment. As per UEFI 2.9, section 12.1: "This protocol is used to handle input and output of text-based information intended for the system user during the operation of code in the boot services environment." So drop the diagnostic warning from this function. We might add back a warning that is issued later when initializing the kernel itself. Signed-off-by: Nikolay Borisov Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/x86-stub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c index 2fee52ed335d13..3b8bccd7c21690 100644 --- a/drivers/firmware/efi/libstub/x86-stub.c +++ b/drivers/firmware/efi/libstub/x86-stub.c @@ -605,11 +605,8 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s break; case EFI_UNACCEPTED_MEMORY: - if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) { - efi_warn_once( -"The system has unaccepted memory, but kernel does not support it\nConsider enabling CONFIG_UNACCEPTED_MEMORY\n"); + if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) continue; - } e820_type = E820_TYPE_RAM; process_unaccepted_memory(d->phys_addr, d->phys_addr + PAGE_SIZE * d->num_pages); From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Tue, 10 Oct 2023 18:39:33 +0200 Subject: [PATCH 183/661] bonding: Return pointer to data after pull on skb Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers"), header offsets used to compute a hash in bond_xmit_hash() are relative to skb->data and not skb->head. If the tail of the header buffer of an skb really needs to be advanced and the operation is successful, the pointer to the data must be returned (and not a pointer to the head of the buffer). Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers") Signed-off-by: Jiri Wiesner Acked-by: Jay Vosburgh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ed7212e61c541b..51d47eda1c873d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4023,7 +4023,7 @@ static inline const void *bond_pull_data(struct sk_buff *skb, if (likely(n <= hlen)) return data; else if (skb && likely(pskb_may_pull(skb, n))) - return skb->head; + return skb->data; return NULL; } From 0d3ad1917996839a5042d18f04e41915cfa1b74a Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 24 Sep 2023 22:26:33 +0800 Subject: [PATCH 184/661] efi: fix memory leak in krealloc failure handling In the previous code, there was a memory leak issue where the previously allocated memory was not freed upon a failed krealloc operation. This patch addresses the problem by releasing the old memory before setting the pointer to NULL in case of a krealloc failure. This ensures that memory is properly managed and avoids potential memory leaks. Signed-off-by: Kuan-Wei Chiu Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 1599f11768426d..9cfac61812f687 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -273,9 +273,13 @@ static __init int efivar_ssdt_load(void) if (status == EFI_NOT_FOUND) { break; } else if (status == EFI_BUFFER_TOO_SMALL) { - name = krealloc(name, name_size, GFP_KERNEL); - if (!name) + efi_char16_t *name_tmp = + krealloc(name, name_size, GFP_KERNEL); + if (!name_tmp) { + kfree(name); return -ENOMEM; + } + name = name_tmp; continue; } From 4eaf0932c69bdc56d2c2af30404f9c918b1f6295 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 3 Oct 2023 12:02:09 +0200 Subject: [PATCH 185/661] block: Fix regression in sed-opal for a saved key. The commit 3bfeb61256643281ac4be5b8a57e9d9da3db4335 introduced the use of keyring for sed-opal. Unfortunately, there is also a possibility to save the Opal key used in opal_lock_unlock(). This patch switches the order of operation, so the cached key is used instead of failure for opal_get_key. The problem was found by the cryptsetup Opal test recently added to the cryptsetup tree. Fixes: 3bfeb6125664 ("block: sed-opal: keyring support for SED keys") Tested-by: Ondrej Kozina Signed-off-by: Milan Broz Link: https://lore.kernel.org/r/20231003100209.380037-1-gmazyland@gmail.com Signed-off-by: Jens Axboe --- block/sed-opal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 6d7f25d1711ba7..04f38a3f5d9597 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -2888,12 +2888,11 @@ static int opal_lock_unlock(struct opal_dev *dev, if (lk_unlk->session.who > OPAL_USER9) return -EINVAL; - ret = opal_get_key(dev, &lk_unlk->session.opal_key); - if (ret) - return ret; mutex_lock(&dev->dev_lock); opal_lock_check_for_saved_key(dev, lk_unlk); - ret = __opal_lock_unlock(dev, lk_unlk); + ret = opal_get_key(dev, &lk_unlk->session.opal_key); + if (!ret) + ret = __opal_lock_unlock(dev, lk_unlk); mutex_unlock(&dev->dev_lock); return ret; From f88dfbf333b3661faff996bb03af2024d907b76a Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Fri, 13 Oct 2023 17:45:25 +0800 Subject: [PATCH 186/661] ASoC: rt5650: fix the wrong result of key button The RT5650 should enable a power setting for button detection to avoid the wrong result. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20231013094525.715518-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5645.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 1a137ca3f49647..7938b52d741d8c 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -3257,6 +3257,8 @@ int rt5645_set_jack_detect(struct snd_soc_component *component, RT5645_GP1_PIN_IRQ, RT5645_GP1_PIN_IRQ); regmap_update_bits(rt5645->regmap, RT5645_GEN_CTRL1, RT5645_DIG_GATE_CTRL, RT5645_DIG_GATE_CTRL); + regmap_update_bits(rt5645->regmap, RT5645_DEPOP_M1, + RT5645_HP_CB_MASK, RT5645_HP_CB_PU); } rt5645_irq(0, rt5645); From 4e9a429ae80657bdc502d3f5078e2073656ec5fd Mon Sep 17 00:00:00 2001 From: Roy Chateau Date: Fri, 13 Oct 2023 13:02:39 +0200 Subject: [PATCH 187/661] ASoC: codecs: tas2780: Fix log of failed reset via I2C. Correctly log failures of reset via I2C. Signed-off-by: Roy Chateau Link: https://lore.kernel.org/r/20231013110239.473123-1-roy.chateau@mep-info.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2780.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/tas2780.c b/sound/soc/codecs/tas2780.c index 86bd6c18a94401..41076be238542b 100644 --- a/sound/soc/codecs/tas2780.c +++ b/sound/soc/codecs/tas2780.c @@ -39,7 +39,7 @@ static void tas2780_reset(struct tas2780_priv *tas2780) usleep_range(2000, 2050); } - snd_soc_component_write(tas2780->component, TAS2780_SW_RST, + ret = snd_soc_component_write(tas2780->component, TAS2780_SW_RST, TAS2780_RST); if (ret) dev_err(tas2780->dev, "%s:errCode:0x%x Reset error!\n", From 03adc61edad49e1bbecfb53f7ea5d78f398fe368 Mon Sep 17 00:00:00 2001 From: Dan Clash Date: Thu, 12 Oct 2023 14:55:18 -0700 Subject: [PATCH 188/661] audit,io_uring: io_uring openat triggers audit reference count underflow An io_uring openat operation can update an audit reference count from multiple threads resulting in the call trace below. A call to io_uring_submit() with a single openat op with a flag of IOSQE_ASYNC results in the following reference count updates. These first part of the system call performs two increments that do not race. do_syscall_64() __do_sys_io_uring_enter() io_submit_sqes() io_openat_prep() __io_openat_prep() getname() getname_flags() /* update 1 (increment) */ __audit_getname() /* update 2 (increment) */ The openat op is queued to an io_uring worker thread which starts the opportunity for a race. The system call exit performs one decrement. do_syscall_64() syscall_exit_to_user_mode() syscall_exit_to_user_mode_prepare() __audit_syscall_exit() audit_reset_context() putname() /* update 3 (decrement) */ The io_uring worker thread performs one increment and two decrements. These updates can race with the system call decrement. io_wqe_worker() io_worker_handle_work() io_wq_submit_work() io_issue_sqe() io_openat() io_openat2() do_filp_open() path_openat() __audit_inode() /* update 4 (increment) */ putname() /* update 5 (decrement) */ __audit_uring_exit() audit_reset_context() putname() /* update 6 (decrement) */ The fix is to change the refcnt member of struct audit_names from int to atomic_t. kernel BUG at fs/namei.c:262! Call Trace: ... ? putname+0x68/0x70 audit_reset_context.part.0.constprop.0+0xe1/0x300 __audit_uring_exit+0xda/0x1c0 io_issue_sqe+0x1f3/0x450 ? lock_timer_base+0x3b/0xd0 io_wq_submit_work+0x8d/0x2b0 ? __try_to_del_timer_sync+0x67/0xa0 io_worker_handle_work+0x17c/0x2b0 io_wqe_worker+0x10a/0x350 Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/MW2PR2101MB1033FFF044A258F84AEAA584F1C9A@MW2PR2101MB1033.namprd21.prod.outlook.com/ Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") Signed-off-by: Dan Clash Link: https://lore.kernel.org/r/20231012215518.GA4048@linuxonhyperv3.guj3yctzbm1etfxqx2vob5hsef.xx.internal.cloudapp.net Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- fs/namei.c | 9 +++++---- include/linux/fs.h | 2 +- kernel/auditsc.c | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 567ee547492bc2..94565bd7e73f6f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -188,7 +188,7 @@ getname_flags(const char __user *filename, int flags, int *empty) } } - result->refcnt = 1; + atomic_set(&result->refcnt, 1); /* The empty path is special. */ if (unlikely(!len)) { if (empty) @@ -249,7 +249,7 @@ getname_kernel(const char * filename) memcpy((char *)result->name, filename, len); result->uptr = NULL; result->aname = NULL; - result->refcnt = 1; + atomic_set(&result->refcnt, 1); audit_getname(result); return result; @@ -261,9 +261,10 @@ void putname(struct filename *name) if (IS_ERR(name)) return; - BUG_ON(name->refcnt <= 0); + if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) + return; - if (--name->refcnt > 0) + if (!atomic_dec_and_test(&name->refcnt)) return; if (name->name != name->iname) { diff --git a/include/linux/fs.h b/include/linux/fs.h index b528f063e8ffaa..4a40823c3c6784 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2403,7 +2403,7 @@ struct audit_names; struct filename { const char *name; /* pointer to actual string */ const __user char *uptr; /* original userland pointer */ - int refcnt; + atomic_t refcnt; struct audit_names *aname; const char iname[]; }; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 21d2fa815e782a..6f0d6fb6523fa7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2212,7 +2212,7 @@ __audit_reusename(const __user char *uptr) if (!n->name) continue; if (n->name->uptr == uptr) { - n->name->refcnt++; + atomic_inc(&n->name->refcnt); return n->name; } } @@ -2241,7 +2241,7 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; name->aname = n; - name->refcnt++; + atomic_inc(&name->refcnt); } static inline int audit_copy_fcaps(struct audit_names *name, @@ -2373,7 +2373,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, return; if (name) { n->name = name; - name->refcnt++; + atomic_inc(&name->refcnt); } out: @@ -2500,7 +2500,7 @@ void __audit_inode_child(struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - found_child->name->refcnt++; + atomic_inc(&found_child->name->refcnt); } } From 8702cf12e6ba91616a72d684e90357977972991b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 9 Oct 2023 18:38:14 -0700 Subject: [PATCH 189/661] tcp: Fix listen() warning with v4-mapped-v6 address. syzbot reported a warning [0] introduced by commit c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address."). After the cited commit, a v4 socket's address matches the corresponding v4-mapped-v6 tb2 in inet_bind2_bucket_match_addr(), not vice versa. During X.X.X.X -> ::ffff:X.X.X.X order bind()s, the second bind() uses bhash and conflicts properly without checking bhash2 so that we need not check if a v4-mapped-v6 sk matches the corresponding v4 address tb2 in inet_bind2_bucket_match_addr(). However, the repro shows that we need to check that in a no-conflict case. The repro bind()s two sockets to the 2-tuples using SO_REUSEPORT and calls listen() for the first socket: from socket import * s1 = socket() s1.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) s1.bind(('127.0.0.1', 0)) s2 = socket(AF_INET6) s2.setsockopt(SOL_SOCKET, SO_REUSEPORT, 1) s2.bind(('::ffff:127.0.0.1', s1.getsockname()[1])) s1.listen() The second socket should belong to the first socket's tb2, but the second bind() creates another tb2 bucket because inet_bind2_bucket_find() returns NULL in inet_csk_get_port() as the v4-mapped-v6 sk does not match the corresponding v4 address tb2. bhash2[] -> tb2(::ffff:X.X.X.X) -> tb2(X.X.X.X) Then, listen() for the first socket calls inet_csk_get_port(), where the v4 address matches the v4-mapped-v6 tb2 and WARN_ON() is triggered. To avoid that, we need to check if v4-mapped-v6 sk address matches with the corresponding v4 address tb2 in inet_bind2_bucket_match(). The same checks are needed in inet_bind2_bucket_addr_match() too, so we can move all checks there and call it from inet_bind2_bucket_match(). Note that now tb->family is just an address family of tb->(v6_)?rcv_saddr and not of sockets in the bucket. This could be refactored later by defining tb->rcv_saddr as tb->v6_rcv_saddr.s6_addr32[3] and prepending ::ffff: when creating v4 tb2. [0]: WARNING: CPU: 0 PID: 5049 at net/ipv4/inet_connection_sock.c:587 inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 Modules linked in: CPU: 0 PID: 5049 Comm: syz-executor288 Not tainted 6.6.0-rc2-syzkaller-00018-g2cf0f7156238 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/04/2023 RIP: 0010:inet_csk_get_port+0xf96/0x2350 net/ipv4/inet_connection_sock.c:587 Code: 7c 24 08 e8 4c b6 8a 01 31 d2 be 88 01 00 00 48 c7 c7 e0 94 ae 8b e8 59 2e a3 f8 2e 2e 2e 31 c0 e9 04 fe ff ff e8 ca 88 d0 f8 <0f> 0b e9 0f f9 ff ff e8 be 88 d0 f8 49 8d 7e 48 e8 65 ca 5a 00 31 RSP: 0018:ffffc90003abfbf0 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff888026429100 RCX: 0000000000000000 RDX: ffff88807edcbb80 RSI: ffffffff88b73d66 RDI: ffff888026c49f38 RBP: ffff888026c49f30 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff9260f200 R13: ffff888026c49880 R14: 0000000000000000 R15: ffff888026429100 FS: 00005555557d5380(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000025754000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: inet_csk_listen_start+0x155/0x360 net/ipv4/inet_connection_sock.c:1256 __inet_listen_sk+0x1b8/0x5c0 net/ipv4/af_inet.c:217 inet_listen+0x93/0xd0 net/ipv4/af_inet.c:239 __sys_listen+0x194/0x270 net/socket.c:1866 __do_sys_listen net/socket.c:1875 [inline] __se_sys_listen net/socket.c:1873 [inline] __x64_sys_listen+0x53/0x80 net/socket.c:1873 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f3a5bce3af9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc1a1c79e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000032 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a5bce3af9 RDX: 00007f3a5bce3af9 RSI: 0000000000000000 RDI: 0000000000000003 RBP: 00007f3a5bd565f0 R08: 0000000000000006 R09: 0000000000000006 R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000001 R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 Fixes: c48ef9c4aed3 ("tcp: Fix bind() regression for v4-mapped-v6 non-wildcard address.") Reported-by: syzbot+71e724675ba3958edb31@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=71e724675ba3958edb31 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231010013814.70571-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c32f5e28758be7..598c1b114d2c22 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -149,8 +149,14 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb2->family) - return false; + if (sk->sk_family != tb2->family) { + if (sk->sk_family == AF_INET) + return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) && + tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; + + return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) && + sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr; + } if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, @@ -819,19 +825,7 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, tb->l3mdev != l3mdev) return false; -#if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family != tb->family) { - if (sk->sk_family == AF_INET) - return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) && - tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr; - - return false; - } - - if (sk->sk_family == AF_INET6) - return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); -#endif - return tb->rcv_saddr == sk->sk_rcv_saddr; + return inet_bind2_bucket_addr_match(tb, sk); } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, From 9c97790a07dc4f9bdc6e1701003dc9b86f749c71 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 13 Oct 2023 18:37:33 +0100 Subject: [PATCH 190/661] ASoC: dwc: Fix non-DT instantiation Commit d6d6c513f5d2 ("ASoC: dwc: Use ops to get platform data") converted the DesignWare I2S driver to use a DT specific function to obtain platform data but this breaks at least non-DT systems such as AMD. Revert it. Fixes: d6d6c513f5d2 ("ASoC: dwc: Use ops to get platform data") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20231013-asoc-fix-dwc-v1-1-63211bb746b9@kernel.org Signed-off-by: Mark Brown --- sound/soc/dwc/dwc-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/dwc/dwc-i2s.c b/sound/soc/dwc/dwc-i2s.c index 22c004179214d2..9ea4be56d3b709 100644 --- a/sound/soc/dwc/dwc-i2s.c +++ b/sound/soc/dwc/dwc-i2s.c @@ -917,7 +917,7 @@ static int jh7110_i2stx0_clk_cfg(struct i2s_clk_config_data *config) static int dw_i2s_probe(struct platform_device *pdev) { - const struct i2s_platform_data *pdata = of_device_get_match_data(&pdev->dev); + const struct i2s_platform_data *pdata = pdev->dev.platform_data; struct dw_i2s_dev *dev; struct resource *res; int ret, irq; From 8d6b3ea4d9eaca80982442b68a292ce50ce0a135 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Thu, 14 Sep 2023 18:10:18 -0600 Subject: [PATCH 191/661] iio: adc: xilinx-xadc: Don't clobber preset voltage/temperature thresholds In the probe function, the driver was reading out the thresholds already set in the core, which can be configured by the user in the Vivado tools when the FPGA image is built. However, it later clobbered those values with zero or maximum values. In particular, the overtemperature shutdown threshold register was overwritten with the max value, which effectively prevents the FPGA from shutting down when the desired threshold was eached, potentially risking hardware damage in that case. Remove this code to leave the preconfigured default threshold values intact. The code was also disabling all alarms regardless of what enable state they were left in by the FPGA image, including the overtemperature shutdown feature. Leave these bits in their original state so they are not unconditionally disabled. Fixes: bdc8cda1d010 ("iio:adc: Add Xilinx XADC driver") Signed-off-by: Robert Hancock Acked-by: O'Griofa, Conall Tested-by: O'Griofa, Conall Link: https://lore.kernel.org/r/20230915001019.2862964-2-robert.hancock@calian.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index dba73300f89482..d4d0d184a172f1 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -1423,28 +1423,6 @@ static int xadc_probe(struct platform_device *pdev) if (ret) return ret; - /* Disable all alarms */ - ret = xadc_update_adc_reg(xadc, XADC_REG_CONF1, XADC_CONF1_ALARM_MASK, - XADC_CONF1_ALARM_MASK); - if (ret) - return ret; - - /* Set thresholds to min/max */ - for (i = 0; i < 16; i++) { - /* - * Set max voltage threshold and both temperature thresholds to - * 0xffff, min voltage threshold to 0. - */ - if (i % 8 < 4 || i == 7) - xadc->threshold[i] = 0xffff; - else - xadc->threshold[i] = 0; - ret = xadc_write_adc_reg(xadc, XADC_REG_THRESHOLD(i), - xadc->threshold[i]); - if (ret) - return ret; - } - /* Go to non-buffered mode */ xadc_postdisable(indio_dev); From e2bd8c28b9bd835077eb65715d416d667694a80d Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Thu, 14 Sep 2023 18:10:19 -0600 Subject: [PATCH 192/661] iio: adc: xilinx-xadc: Correct temperature offset/scale for UltraScale The driver was previously using offset and scale values for the temperature sensor readings which were only valid for 7-series devices. Add per-device-type values for offset and scale and set them appropriately for each device type. Note that the values used for the UltraScale family are for UltraScale+ (i.e. the SYSMONE4 primitive) using the internal reference, as that seems to be the most common configuration and the device tree values Xilinx's device tree generator produces don't seem to give us anything to tell us which configuration is used. However, the differences within the UltraScale family seem fairly minor and it's closer than using the 7-series values instead in any case. Fixes: c2b7720a7905 ("iio: xilinx-xadc: Add basic support for Ultrascale System Monitor") Signed-off-by: Robert Hancock Acked-by: O'Griofa, Conall Tested-by: O'Griofa, Conall Link: https://lore.kernel.org/r/20230915001019.2862964-3-robert.hancock@calian.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/xilinx-xadc-core.c | 17 ++++++++++++++--- drivers/iio/adc/xilinx-xadc.h | 2 ++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/iio/adc/xilinx-xadc-core.c b/drivers/iio/adc/xilinx-xadc-core.c index d4d0d184a172f1..564c0cad0fc797 100644 --- a/drivers/iio/adc/xilinx-xadc-core.c +++ b/drivers/iio/adc/xilinx-xadc-core.c @@ -456,6 +456,9 @@ static const struct xadc_ops xadc_zynq_ops = { .interrupt_handler = xadc_zynq_interrupt_handler, .update_alarm = xadc_zynq_update_alarm, .type = XADC_TYPE_S7, + /* Temp in C = (val * 503.975) / 2**bits - 273.15 */ + .temp_scale = 503975, + .temp_offset = 273150, }; static const unsigned int xadc_axi_reg_offsets[] = { @@ -566,6 +569,9 @@ static const struct xadc_ops xadc_7s_axi_ops = { .interrupt_handler = xadc_axi_interrupt_handler, .flags = XADC_FLAGS_BUFFERED | XADC_FLAGS_IRQ_OPTIONAL, .type = XADC_TYPE_S7, + /* Temp in C = (val * 503.975) / 2**bits - 273.15 */ + .temp_scale = 503975, + .temp_offset = 273150, }; static const struct xadc_ops xadc_us_axi_ops = { @@ -577,6 +583,12 @@ static const struct xadc_ops xadc_us_axi_ops = { .interrupt_handler = xadc_axi_interrupt_handler, .flags = XADC_FLAGS_BUFFERED | XADC_FLAGS_IRQ_OPTIONAL, .type = XADC_TYPE_US, + /** + * Values below are for UltraScale+ (SYSMONE4) using internal reference. + * See https://docs.xilinx.com/v/u/en-US/ug580-ultrascale-sysmon + */ + .temp_scale = 509314, + .temp_offset = 280231, }; static int _xadc_update_adc_reg(struct xadc *xadc, unsigned int reg, @@ -945,8 +957,7 @@ static int xadc_read_raw(struct iio_dev *indio_dev, *val2 = bits; return IIO_VAL_FRACTIONAL_LOG2; case IIO_TEMP: - /* Temp in C = (val * 503.975) / 2**bits - 273.15 */ - *val = 503975; + *val = xadc->ops->temp_scale; *val2 = bits; return IIO_VAL_FRACTIONAL_LOG2; default: @@ -954,7 +965,7 @@ static int xadc_read_raw(struct iio_dev *indio_dev, } case IIO_CHAN_INFO_OFFSET: /* Only the temperature channel has an offset */ - *val = -((273150 << bits) / 503975); + *val = -((xadc->ops->temp_offset << bits) / xadc->ops->temp_scale); return IIO_VAL_INT; case IIO_CHAN_INFO_SAMP_FREQ: ret = xadc_read_samplerate(xadc); diff --git a/drivers/iio/adc/xilinx-xadc.h b/drivers/iio/adc/xilinx-xadc.h index 7d78ce6989671a..3036f4d613ff5d 100644 --- a/drivers/iio/adc/xilinx-xadc.h +++ b/drivers/iio/adc/xilinx-xadc.h @@ -85,6 +85,8 @@ struct xadc_ops { unsigned int flags; enum xadc_type type; + int temp_scale; + int temp_offset; }; static inline int _xadc_read_adc_reg(struct xadc *xadc, unsigned int reg, From dbb13378ba306484214b84304e232723a2aaf10a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Oct 2023 16:21:12 +0300 Subject: [PATCH 193/661] selftests: fib_tests: Disable RP filter in multipath list receive test The test relies on the fib:fib_table_lookup trace point being triggered once for each forwarded packet. If RP filter is not disabled, the trace point will be triggered twice for each packet (for source validation and forwarding), potentially masking actual bugs. Fix by explicitly disabling RP filter. Before: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.99) [ OK ] After: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] Fixes: 8ae9efb859c0 ("selftests: fib_tests: Add multipath list receive tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/netdev/202309191658.c00d8b8-oliver.sang@intel.com/ Tested-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Tested-by: Sriram Yagnaraman Link: https://lore.kernel.org/r/20231010132113.3014691-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index e7d2a530618a16..0dbb26b4fa4a15 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2437,6 +2437,9 @@ ipv4_mpath_list_test() run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') From aa13e5241a8fa32b26d5a6b8b63d04c6357243f4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Oct 2023 16:21:13 +0300 Subject: [PATCH 194/661] selftests: fib_tests: Count all trace point invocations The tests rely on the IPv{4,6} FIB trace points being triggered once for each forwarded packet. If receive processing is deferred to the ksoftirqd task these invocations will not be counted and the tests will fail. Fix by specifying the '-a' flag to avoid perf from filtering on the mausezahn task. Before: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (.68) [FAIL] # ./fib_tests.sh -t ipv6_mpath_list IPv6 multipath list receive tests TEST: Multipath route hit ratio (.27) [FAIL] After: # ./fib_tests.sh -t ipv4_mpath_list IPv4 multipath list receive tests TEST: Multipath route hit ratio (1.00) [ OK ] # ./fib_tests.sh -t ipv6_mpath_list IPv6 multipath list receive tests TEST: Multipath route hit ratio (.99) [ OK ] Fixes: 8ae9efb859c0 ("selftests: fib_tests: Add multipath list receive tests") Reported-by: kernel test robot Closes: https://lore.kernel.org/netdev/202309191658.c00d8b8-oliver.sang@intel.com/ Tested-by: kernel test robot Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Tested-by: Sriram Yagnaraman Link: https://lore.kernel.org/r/20231010132113.3014691-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 0dbb26b4fa4a15..66d0db7a2614d3 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2452,7 +2452,7 @@ ipv4_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2497,7 +2497,7 @@ ipv6_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Oct 2023 11:24:19 +0800 Subject: [PATCH 195/661] net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register() In bcm_sf2_mdio_register(), the class_find_device() will call get_device() to increment reference count for priv->master_mii_bus->dev if of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register() fails, it will call get_device() twice without decrement reference count for the device. And it is the same if bcm_sf2_mdio_register() succeeds but fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the reference count has not decremented to zero, the dev related resource will not be freed. So remove the get_device() in bcm_sf2_mdio_register(), and call put_device() if mdiobus_alloc() or mdiobus_register() fails and in bcm_sf2_mdio_unregister() to solve the issue. And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and just return 0 if it succeeds to make it cleaner. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Jinjie Ruan Suggested-by: Simon Horman Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 72374b066f64a9..cd1f240c90f396 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); priv->master_mii_bus = of_mdio_find_bus(dn); if (!priv->master_mii_bus) { - of_node_put(dn); - return -EPROBE_DEFER; + err = -EPROBE_DEFER; + goto err_of_node_put; } - get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { - of_node_put(dn); - return -ENOMEM; + err = -ENOMEM; + goto err_put_master_mii_bus_dev; } priv->slave_mii_bus->priv = priv; @@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) { - mdiobus_free(priv->slave_mii_bus); - of_node_put(dn); - } + if (err && dn) + goto err_free_slave_mii_bus; + return 0; + +err_free_slave_mii_bus: + mdiobus_free(priv->slave_mii_bus); +err_put_master_mii_bus_dev: + put_device(&priv->master_mii_bus->dev); +err_of_node_put: + of_node_put(dn); return err; } @@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); mdiobus_free(priv->slave_mii_bus); + put_device(&priv->master_mii_bus->dev); of_node_put(priv->master_mii_dn); } From 242e34500a32631f85c2b4eb6cb42a368a39e54f Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 10 Oct 2023 13:30:59 -0700 Subject: [PATCH 196/661] ice: fix over-shifted variable Since the introduction of the ice driver the code has been double-shifting the RSS enabling field, because the define already has shifts in it and can't have the regular pattern of "a << shiftval & mask" applied. Most places in the code got it right, but one line was still wrong. Fix this one location for easy backports to stable. An in-progress patch fixes the defines to "standard" and will be applied as part of the regular -next process sometime after this one. Fixes: d76a60ba7afb ("ice: Add support for VLANs and offloads") Reviewed-by: Przemek Kitszel CC: stable@vger.kernel.org Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231010203101.406248-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7bf9b70697542d..73bbf06a76db9d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1201,8 +1201,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + (hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } static void From 419ce133ab928ab5efd7b50b2ef36ddfd4eadbd2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 11 Oct 2023 09:20:55 +0200 Subject: [PATCH 197/661] tcp: allow again tcp_disconnect() when threads are waiting As reported by Tom, .NET and applications build on top of it rely on connect(AF_UNSPEC) to async cancel pending I/O operations on TCP socket. The blamed commit below caused a regression, as such cancellation can now fail. As suggested by Eric, this change addresses the problem explicitly causing blocking I/O operation to terminate immediately (with an error) when a concurrent disconnect() is executed. Instead of tracking the number of threads blocked on a given socket, track the number of disconnect() issued on such socket. If such counter changes after a blocking operation releasing and re-acquiring the socket lock, error out the current operation. Fixes: 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting") Reported-by: Tom Deseyn Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1886305 Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/f3b95e47e3dbed840960548aebaa8d954372db41.1697008693.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_io.c | 36 +++++++++++++++---- include/net/sock.h | 10 +++--- net/core/stream.c | 12 ++++--- net/ipv4/af_inet.c | 10 ++++-- net/ipv4/inet_connection_sock.c | 1 - net/ipv4/tcp.c | 16 ++++----- net/ipv4/tcp_bpf.c | 4 +++ net/mptcp/protocol.c | 7 ---- net/tls/tls_main.c | 10 ++++-- net/tls/tls_sw.c | 19 ++++++---- 10 files changed, 80 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 5fc64e47568a9c..d567e42e176011 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -911,7 +911,7 @@ static int csk_wait_memory(struct chtls_dev *cdev, struct sock *sk, long *timeo_p) { DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = 0; + int ret, err = 0; long current_timeo; long vm_wait = 0; bool noblock; @@ -942,10 +942,13 @@ static int csk_wait_memory(struct chtls_dev *cdev, set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, sk->sk_err || - (sk->sk_shutdown & SEND_SHUTDOWN) || - (csk_mem_free(cdev, sk) && !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || + (sk->sk_shutdown & SEND_SHUTDOWN) || + (csk_mem_free(cdev, sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; @@ -1348,6 +1351,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; int target; long timeo; + int ret; buffers_freed = 0; @@ -1423,7 +1427,11 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: if (!skb->len) { @@ -1518,6 +1526,8 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (buffers_freed) chtls_cleanup_rbuf(sk, copied); + +unlock: release_sock(sk); return copied; } @@ -1534,6 +1544,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, int copied = 0; size_t avail; /* amount of available data in current skb */ long timeo; + int ret; lock_sock(sk); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); @@ -1585,7 +1596,12 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, release_sock(sk); lock_sock(sk); } else { - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + /* here 'copied' is 0 due to previous checks */ + copied = ret; + break; + } } if (unlikely(peek_seq != tp->copied_seq)) { @@ -1656,6 +1672,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; long timeo; int target; /* Read at least this many bytes */ + int ret; buffers_freed = 0; @@ -1747,7 +1764,11 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: @@ -1816,6 +1837,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (buffers_freed) chtls_cleanup_rbuf(sk, copied); +unlock: release_sock(sk); return copied; } diff --git a/include/net/sock.h b/include/net/sock.h index b770261fbdaf59..92f7ea62a9159c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -336,7 +336,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start - * @sk_wait_pending: number of threads blocked on this socket + * @sk_disconnects: number of disconnect operations performed on this sock * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -429,7 +429,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; - int sk_wait_pending; + int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -1189,8 +1189,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ - ({ int __rc; \ - __sk->sk_wait_pending++; \ + ({ int __rc, __dis = __sk->sk_disconnects; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1200,8 +1199,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ - __sk->sk_wait_pending--; \ - __rc = __condition; \ + __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ __rc; \ }) diff --git a/net/core/stream.c b/net/core/stream.c index f5c4e47df16505..96fbcb9bbb30a5 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close); */ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) { - int err = 0; + int ret, err = 0; long vm_wait = 0; long current_timeo = *timeo_p; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || - (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || - (sk_stream_memory_free(sk) && - !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || + (sk_stream_memory_free(sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e2047356..2713c9b06c4c0f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -597,7 +597,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; - sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -613,7 +612,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; - sk->sk_wait_pending--; return timeo; } @@ -642,6 +640,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) { + sk->sk_disconnects++; err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; goto out; @@ -696,6 +695,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int writebias = (sk->sk_protocol == IPPROTO_TCP) && tcp_sk(sk)->fastopen_req && tcp_sk(sk)->fastopen_req->data ? 1 : 0; + int dis = sk->sk_disconnects; /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) @@ -704,6 +704,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; + + if (dis != sk->sk_disconnects) { + err = -EPIPE; + goto out; + } } /* Connection was closed by RST, timeout, ICMP error @@ -725,6 +730,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, sock_error: err = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; + sk->sk_disconnects++; if (sk->sk_prot->disconnect(sk, flags)) sock->state = SS_DISCONNECTING; goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index aeebe881668996..394a498c282322 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1145,7 +1145,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); - newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; newicsk->icsk_bind2_hash = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3f66cdeef7decb..d3456cf840de35 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -831,7 +831,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, */ if (!skb_queue_empty(&sk->sk_receive_queue)) break; - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) + break; if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -2442,7 +2444,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, last); + err = sk_wait_data(sk, &timeo, last); + if (err < 0) { + err = copied ? : err; + goto out; + } } if ((flags & MSG_PEEK) && @@ -2966,12 +2972,6 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 32726820300156..ba2e9218812487 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -307,6 +307,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, } data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data && !sk_psock_queue_empty(psock)) goto msg_bytes_ready; copied = -EAGAIN; @@ -351,6 +353,8 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c3b83cb390d9d8..d1902373c9745c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3098,12 +3098,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under * msk->firstsocket lock). @@ -3173,7 +3167,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif - nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 02f583ff923953..002483e60c190d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -139,8 +139,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) int wait_on_pending_writer(struct sock *sk, long *timeo) { - int rc = 0; DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret, rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { @@ -154,9 +154,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) break; } - if (sk_wait_event(sk, timeo, - !READ_ONCE(sk->sk_write_pending), &wait)) + ret = sk_wait_event(sk, timeo, + !READ_ONCE(sk->sk_write_pending), &wait); + if (ret) { + if (ret < 0) + rc = ret; break; + } } remove_wait_queue(sk_sleep(sk), &wait); return rc; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d1fc295b83b59a..e9d1e83a859d1f 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1291,6 +1291,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; long timeo; timeo = sock_rcvtimeo(sk, nonblock); @@ -1302,6 +1303,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (sk->sk_err) return sock_error(sk); + if (ret < 0) + return ret; + if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) @@ -1320,10 +1324,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - tls_strp_msg_ready(ctx) || - !sk_psock_queue_empty(psock), - &wait); + ret = sk_wait_event(sk, &timeo, + tls_strp_msg_ready(ctx) || + !sk_psock_queue_empty(psock), + &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1852,6 +1856,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; + int ret; timeo = sock_rcvtimeo(sk, nonblock); @@ -1861,14 +1866,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); - sk_wait_event(sk, &timeo, - !READ_ONCE(ctx->reader_present), &wait); + ret = sk_wait_event(sk, &timeo, + !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); if (timeo <= 0) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(timeo); + if (ret < 0) + return ret; } WRITE_ONCE(ctx->reader_present, 1); From c68681ae46eaaa1640b52fe366d21a93b2185df5 Mon Sep 17 00:00:00 2001 From: Albert Huang Date: Wed, 11 Oct 2023 15:48:51 +0800 Subject: [PATCH 198/661] net/smc: fix smc clc failed issue when netdevice not in init_net If the netdevice is within a container and communicates externally through network technologies such as VxLAN, we won't be able to find routing information in the init_net namespace. To address this issue, we need to add a struct net parameter to the smc_ib_find_route function. This allow us to locate the routing information within the corresponding net namespace, ensuring the correct completion of the SMC CLC interaction. Fixes: e5c4744cfb59 ("net/smc: add SMC-Rv2 connection establishment") Signed-off-by: Albert Huang Reviewed-by: Dust Li Reviewed-by: Wenjia Zhang Link: https://lore.kernel.org/r/20231011074851.95280-1-huangjie.albert@bytedance.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 3 ++- net/smc/smc_ib.c | 7 ++++--- net/smc/smc_ib.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bacdd971615e43..7a874da90c7fc3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1201,6 +1201,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, (struct smc_clc_msg_accept_confirm_v2 *)aclc; struct smc_clc_first_contact_ext *fce = smc_get_clc_first_contact_ext(clc_v2, false); + struct net *net = sock_net(&smc->sk); int rc; if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1) @@ -1210,7 +1211,7 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc, memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN); ini->smcrv2.uses_gateway = false; } else { - if (smc_ib_find_route(smc->clcsock->sk->sk_rcv_saddr, + if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr, smc_ib_gid_to_ipv4(aclc->r0.lcl.gid), ini->smcrv2.nexthop_mac, &ini->smcrv2.uses_gateway)) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9b66d6aeeb1ae4..89981dbe46c946 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -193,7 +193,7 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport) return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE; } -int smc_ib_find_route(__be32 saddr, __be32 daddr, +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway) { struct neighbour *neigh = NULL; @@ -205,7 +205,7 @@ int smc_ib_find_route(__be32 saddr, __be32 daddr, if (daddr == cpu_to_be32(INADDR_NONE)) goto out; - rt = ip_route_output_flow(&init_net, &fl4, NULL); + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto out; if (rt->rt_uses_gateway && rt->rt_gw_family != AF_INET) @@ -235,6 +235,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, if (smcrv2 && attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP && smc_ib_gid_to_ipv4((u8 *)&attr->gid) != cpu_to_be32(INADDR_NONE)) { struct in_device *in_dev = __in_dev_get_rcu(ndev); + struct net *net = dev_net(ndev); const struct in_ifaddr *ifa; bool subnet_match = false; @@ -248,7 +249,7 @@ static int smc_ib_determine_gid_rcu(const struct net_device *ndev, } if (!subnet_match) goto out; - if (smcrv2->daddr && smc_ib_find_route(smcrv2->saddr, + if (smcrv2->daddr && smc_ib_find_route(net, smcrv2->saddr, smcrv2->daddr, smcrv2->nexthop_mac, &smcrv2->uses_gateway)) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 4df5f8c8a0a1c7..ef8ac2b7546df5 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -112,7 +112,7 @@ void smc_ib_sync_sg_for_device(struct smc_link *lnk, int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, unsigned short vlan_id, u8 gid[], u8 *sgid_index, struct smc_init_info_smcrv2 *smcrv2); -int smc_ib_find_route(__be32 saddr, __be32 daddr, +int smc_ib_find_route(struct net *net, __be32 saddr, __be32 daddr, u8 nexthop_mac[], u8 *uses_gateway); bool smc_ib_is_valid_local_systemid(void); int smcr_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb); From 0f4d44f6ee048818abf80c69b6bc48927c178abe Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 12 Oct 2023 13:58:11 +0200 Subject: [PATCH 199/661] netlink: specs: devlink: fix reply command values Make sure that the command values used for replies are correct. This is only affecting generated userspace helpers, no change on kernel code. Fixes: 7199c86247e9 ("netlink: specs: devlink: add commands that do per-instance dump") Signed-off-by: Jiri Pirko Link: https://lore.kernel.org/r/20231012115811.298129-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 18 ++++---- tools/net/ynl/generated/devlink-user.c | 54 ++++++++++++------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index d1ebcd92714973..065661acb878f9 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -323,7 +323,7 @@ operations: - dev-name - sb-index reply: &sb-get-reply - value: 11 + value: 13 attributes: *sb-id-attrs dump: request: @@ -350,7 +350,7 @@ operations: - sb-index - sb-pool-index reply: &sb-pool-get-reply - value: 15 + value: 17 attributes: *sb-pool-id-attrs dump: request: @@ -378,7 +378,7 @@ operations: - sb-index - sb-pool-index reply: &sb-port-pool-get-reply - value: 19 + value: 21 attributes: *sb-port-pool-id-attrs dump: request: @@ -407,7 +407,7 @@ operations: - sb-pool-type - sb-tc-index reply: &sb-tc-pool-bind-get-reply - value: 23 + value: 25 attributes: *sb-tc-pool-bind-id-attrs dump: request: @@ -538,7 +538,7 @@ operations: - dev-name - trap-name reply: &trap-get-reply - value: 61 + value: 63 attributes: *trap-id-attrs dump: request: @@ -564,7 +564,7 @@ operations: - dev-name - trap-group-name reply: &trap-group-get-reply - value: 65 + value: 67 attributes: *trap-group-id-attrs dump: request: @@ -590,7 +590,7 @@ operations: - dev-name - trap-policer-id reply: &trap-policer-get-reply - value: 69 + value: 71 attributes: *trap-policer-id-attrs dump: request: @@ -617,7 +617,7 @@ operations: - port-index - rate-node-name reply: &rate-get-reply - value: 74 + value: 76 attributes: *rate-id-attrs dump: request: @@ -643,7 +643,7 @@ operations: - dev-name - linecard-index reply: &linecard-get-reply - value: 78 + value: 80 attributes: *linecard-id-attrs dump: request: diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c index 3a8d8499fab6e6..2cb2518500cbb7 100644 --- a/tools/net/ynl/generated/devlink-user.c +++ b/tools/net/ynl/generated/devlink-user.c @@ -16,19 +16,19 @@ static const char * const devlink_op_strmap[] = { [3] = "get", [7] = "port-get", - [DEVLINK_CMD_SB_GET] = "sb-get", - [DEVLINK_CMD_SB_POOL_GET] = "sb-pool-get", - [DEVLINK_CMD_SB_PORT_POOL_GET] = "sb-port-pool-get", - [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = "sb-tc-pool-bind-get", + [13] = "sb-get", + [17] = "sb-pool-get", + [21] = "sb-port-pool-get", + [25] = "sb-tc-pool-bind-get", [DEVLINK_CMD_PARAM_GET] = "param-get", [DEVLINK_CMD_REGION_GET] = "region-get", [DEVLINK_CMD_INFO_GET] = "info-get", [DEVLINK_CMD_HEALTH_REPORTER_GET] = "health-reporter-get", - [DEVLINK_CMD_TRAP_GET] = "trap-get", - [DEVLINK_CMD_TRAP_GROUP_GET] = "trap-group-get", - [DEVLINK_CMD_TRAP_POLICER_GET] = "trap-policer-get", - [DEVLINK_CMD_RATE_GET] = "rate-get", - [DEVLINK_CMD_LINECARD_GET] = "linecard-get", + [63] = "trap-get", + [67] = "trap-group-get", + [71] = "trap-policer-get", + [76] = "rate-get", + [80] = "linecard-get", [DEVLINK_CMD_SELFTESTS_GET] = "selftests-get", }; @@ -838,7 +838,7 @@ devlink_sb_get(struct ynl_sock *ys, struct devlink_sb_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_GET; + yrs.rsp_cmd = 13; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -876,7 +876,7 @@ devlink_sb_get_dump(struct ynl_sock *ys, struct devlink_sb_get_req_dump *req) yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_get_list); yds.cb = devlink_sb_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_GET; + yds.rsp_cmd = 13; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_GET, 1); @@ -987,7 +987,7 @@ devlink_sb_pool_get(struct ynl_sock *ys, struct devlink_sb_pool_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yrs.rsp_cmd = 17; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1026,7 +1026,7 @@ devlink_sb_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_pool_get_list); yds.cb = devlink_sb_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_POOL_GET; + yds.rsp_cmd = 17; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_POOL_GET, 1); @@ -1147,7 +1147,7 @@ devlink_sb_port_pool_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_port_pool_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yrs.rsp_cmd = 21; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1187,7 +1187,7 @@ devlink_sb_port_pool_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_port_pool_get_list); yds.cb = devlink_sb_port_pool_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_PORT_POOL_GET; + yds.rsp_cmd = 21; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_PORT_POOL_GET, 1); @@ -1316,7 +1316,7 @@ devlink_sb_tc_pool_bind_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yrs.rsp_cmd = 25; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -1356,7 +1356,7 @@ devlink_sb_tc_pool_bind_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_sb_tc_pool_bind_get_list); yds.cb = devlink_sb_tc_pool_bind_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET; + yds.rsp_cmd = 25; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_SB_TC_POOL_BIND_GET, 1); @@ -2183,7 +2183,7 @@ devlink_trap_get(struct ynl_sock *ys, struct devlink_trap_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yrs.rsp_cmd = 63; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2223,7 +2223,7 @@ devlink_trap_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_get_list); yds.cb = devlink_trap_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GET; + yds.rsp_cmd = 63; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GET, 1); @@ -2336,7 +2336,7 @@ devlink_trap_group_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_group_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yrs.rsp_cmd = 67; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2376,7 +2376,7 @@ devlink_trap_group_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_group_get_list); yds.cb = devlink_trap_group_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_GROUP_GET; + yds.rsp_cmd = 67; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_GROUP_GET, 1); @@ -2483,7 +2483,7 @@ devlink_trap_policer_get(struct ynl_sock *ys, rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_trap_policer_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yrs.rsp_cmd = 71; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2523,7 +2523,7 @@ devlink_trap_policer_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_trap_policer_get_list); yds.cb = devlink_trap_policer_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_TRAP_POLICER_GET; + yds.rsp_cmd = 71; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_TRAP_POLICER_GET, 1); @@ -2642,7 +2642,7 @@ devlink_rate_get(struct ynl_sock *ys, struct devlink_rate_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_rate_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_RATE_GET; + yrs.rsp_cmd = 76; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2682,7 +2682,7 @@ devlink_rate_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_rate_get_list); yds.cb = devlink_rate_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_RATE_GET; + yds.rsp_cmd = 76; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_RATE_GET, 1); @@ -2786,7 +2786,7 @@ devlink_linecard_get(struct ynl_sock *ys, struct devlink_linecard_get_req *req) rsp = calloc(1, sizeof(*rsp)); yrs.yarg.data = rsp; yrs.cb = devlink_linecard_get_rsp_parse; - yrs.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yrs.rsp_cmd = 80; err = ynl_exec(ys, nlh, &yrs); if (err < 0) @@ -2825,7 +2825,7 @@ devlink_linecard_get_dump(struct ynl_sock *ys, yds.ys = ys; yds.alloc_sz = sizeof(struct devlink_linecard_get_list); yds.cb = devlink_linecard_get_rsp_parse; - yds.rsp_cmd = DEVLINK_CMD_LINECARD_GET; + yds.rsp_cmd = 80; yds.rsp_policy = &devlink_nest; nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_LINECARD_GET, 1); From a258c804aa8742763dce694b5e992d7ccf4294f2 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Thu, 12 Oct 2023 08:31:44 -0400 Subject: [PATCH 200/661] docs: fix info about representor identification Update the "How are representors identified?" documentation subchapter. For newer kernels driver should use SET_NETDEV_DEVLINK_PORT instead of ndo_get_devlink_port() callback. Fixes: 7712b3e966ea ("Merge branch 'net-fix-netdev-to-devlink_port-linkage-and-expose-to-user'") Signed-off-by: Mateusz Polchlopek Reviewed-by: Wojciech Drewek Reviewed-by: Przemek Kitszel Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20231012123144.15768-1-mateusz.polchlopek@intel.com Signed-off-by: Jakub Kicinski --- Documentation/networking/representors.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/representors.rst b/Documentation/networking/representors.rst index ee1f5cd54496e9..decb39c19b9ed2 100644 --- a/Documentation/networking/representors.rst +++ b/Documentation/networking/representors.rst @@ -162,9 +162,11 @@ How are representors identified? The representor netdevice should *not* directly refer to a PCIe device (e.g. through ``net_dev->dev.parent`` / ``SET_NETDEV_DEV()``), either of the representee or of the switchdev function. -Instead, it should implement the ``ndo_get_devlink_port()`` netdevice op, which -the kernel uses to provide the ``phys_switch_id`` and ``phys_port_name`` sysfs -nodes. (Some legacy drivers implement ``ndo_get_port_parent_id()`` and +Instead, the driver should use the ``SET_NETDEV_DEVLINK_PORT`` macro to +assign a devlink port instance to the netdevice before registering the +netdevice; the kernel uses the devlink port to provide the ``phys_switch_id`` +and ``phys_port_name`` sysfs nodes. +(Some legacy drivers implement ``ndo_get_port_parent_id()`` and ``ndo_get_phys_port_name()`` directly, but this is deprecated.) See :ref:`Documentation/networking/devlink/devlink-port.rst ` for the details of this API. From 2c0d808f36cc6e0617f9dda055a6651c777a9d64 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 12 Oct 2023 12:16:26 +0530 Subject: [PATCH 201/661] net: ti: icssg-prueth: Fix tx_total_bytes count ICSSG HW stats on TX side considers 8 preamble bytes as data bytes. Due to this the tx_bytes of ICSSG interface doesn't match the rx_bytes of the link partner. There is no public errata available yet. As a workaround to fix this, decrease tx_bytes by 8 bytes for every tx frame. Fixes: c1e10d5dc7a1 ("net: ti: icssg-prueth: Add ICSSG Stats") Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20231012064626.977466-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_stats.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index bb0b33927e3b82..3dbadddd7e355b 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -9,6 +9,9 @@ #include "icssg_stats.h" #include +#define ICSSG_TX_PACKET_OFFSET 0xA0 +#define ICSSG_TX_BYTE_OFFSET 0xEC + static u32 stats_base[] = { 0x54c, /* Slice 0 stats start */ 0xb18, /* Slice 1 stats start */ }; @@ -18,6 +21,7 @@ void emac_update_hardware_stats(struct prueth_emac *emac) struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); u32 base = stats_base[slice]; + u32 tx_pkt_cnt = 0; u32 val; int i; @@ -29,7 +33,12 @@ void emac_update_hardware_stats(struct prueth_emac *emac) base + icssg_all_stats[i].offset, val); + if (icssg_all_stats[i].offset == ICSSG_TX_PACKET_OFFSET) + tx_pkt_cnt = val; + emac->stats[i] += val; + if (icssg_all_stats[i].offset == ICSSG_TX_BYTE_OFFSET) + emac->stats[i] -= tx_pkt_cnt * 8; } } From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 11 Oct 2023 16:33:32 -0700 Subject: [PATCH 202/661] i40e: prevent crash on probe if hw registers have invalid values The hardware provides the indexes of the first and the last available queue and VF. From the indexes, the driver calculates the numbers of queues and VFs. In theory, a faulty device might say the last index is smaller than the first index. In that case, the driver's calculation would underflow, it would attempt to write to non-existent registers outside of the ioremapped range and crash. I ran into this not by having a faulty device, but by an operator error. I accidentally ran a QE test meant for i40e devices on an ice device. The test used 'echo i40e > /sys/...ice PCI device.../driver_override', bound the driver to the device and crashed in one of the wr32 calls in i40e_clear_hw. Add checks to prevent underflows in the calculations of num_queues and num_vfs. With this fix, the wrong device probing reports errors and returns a failure without crashing. Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index eeef20f7710625..1b493854f52292 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PFLAN_QALLOC_FIRSTQ_SHIFT; j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >> I40E_PFLAN_QALLOC_LASTQ_SHIFT; - if (val & I40E_PFLAN_QALLOC_VALID_MASK) + if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue) num_queues = (j - base_queue) + 1; else num_queues = 0; @@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT; j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >> I40E_PF_VT_PFALLOC_LASTVF_SHIFT; - if (val & I40E_PF_VT_PFALLOC_VALID_MASK) + if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i) num_vfs = (j - i) + 1; else num_vfs = 0; From 0288c3e709e5fabd51e84715c5c798a02f43061a Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 11 Oct 2023 16:33:33 -0700 Subject: [PATCH 203/661] ice: reset first in crash dump kernels When the system boots into the crash dump kernel after a panic, the ice networking device may still have pending transactions that can cause errors or machine checks when the device is re-enabled. This can prevent the crash dump kernel from loading the driver or collecting the crash data. To avoid this issue, perform a function level reset (FLR) on the ice device via PCIe config space before enabling it on the crash kernel. This will clear any outstanding transactions and stop all queues and interrupts. Restore the config space after the FLR, otherwise it was found in testing that the driver wouldn't load successfully. The following sequence causes the original issue: - Load the ice driver with modprobe ice - Enable SR-IOV with 2 VFs: echo 2 > /sys/class/net/eth0/device/sriov_num_vfs - Trigger a crash with echo c > /proc/sysrq-trigger - Load the ice driver again (or let it load automatically) with modprobe ice - The system crashes again during pcim_enable_device() Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series") Reported-by: Vishal Agrawal Reviewed-by: Jay Vosburgh Reviewed-by: Przemek Kitszel Signed-off-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-3-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c8286adae94629..6550c46e4e369b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "ice.h" #include "ice_base.h" #include "ice_lib.h" @@ -5014,6 +5015,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return -EINVAL; } + /* when under a kdump kernel initiate a reset before enabling the + * device in order to clear out any pending DMA transactions. These + * transactions can cause some systems to machine check when doing + * the pcim_enable_device() below. + */ + if (is_kdump_kernel()) { + pci_save_state(pdev); + pci_clear_master(pdev); + err = pcie_flr(pdev); + if (err) + return err; + pci_restore_state(pdev); + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ From 42066c4d5d344cdf8564556cdbe0aa36854fefa4 Mon Sep 17 00:00:00 2001 From: Mateusz Pacuszka Date: Wed, 11 Oct 2023 16:33:34 -0700 Subject: [PATCH 204/661] ice: Fix safe mode when DDP is missing One thing is broken in the safe mode, that is ice_deinit_features() is being executed even that ice_init_features() was not causing stack trace during pci_unregister_driver(). Add check on the top of the function. Fixes: 5b246e533d01 ("ice: split probe into smaller functions") Signed-off-by: Mateusz Pacuszka Signed-off-by: Jan Sokolowski Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-4-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6550c46e4e369b..7784135160fd20 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4684,6 +4684,9 @@ static void ice_init_features(struct ice_pf *pf) static void ice_deinit_features(struct ice_pf *pf) { + if (ice_is_safe_mode(pf)) + return; + ice_deinit_lag(pf); if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) ice_cfg_lldp_mib_change(&pf->hw, false); From b541260615f601ae1b5d6d0cc54e790de706303b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Oct 2023 13:59:59 -0700 Subject: [PATCH 205/661] Bluetooth: hci_event: Fix using memcmp when comparing keys memcmp is not consider safe to use with cryptographic secrets: 'Do not use memcmp() to compare security critical data, such as cryptographic secrets, because the required CPU time depends on the number of equal bytes.' While usage of memcmp for ZERO_KEY may not be considered a security critical data, it can lead to more usage of memcmp with pairing keys which could introduce more security problems. Fixes: 455c2ff0a558 ("Bluetooth: Fix BR/EDR out-of-band pairing with only initiator data") Fixes: 33155c4aae52 ("Bluetooth: hci_event: Ignore NULL link key") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7e1c875e0e888e..9b252a65a7cd7c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -26,6 +26,8 @@ /* Bluetooth HCI event handling. */ #include +#include +#include #include #include @@ -4754,7 +4756,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, void *data, goto unlock; /* Ignore NULL link key against CVE-2020-26555 */ - if (!memcmp(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { + if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) { bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR", &ev->bdaddr); hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); @@ -5294,8 +5296,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * available, then do not declare that OOB data is * present. */ - if (!memcmp(data->rand256, ZERO_KEY, 16) || - !memcmp(data->hash256, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand256, ZERO_KEY, 16) || + !crypto_memneq(data->hash256, ZERO_KEY, 16)) return 0x00; return 0x02; @@ -5305,8 +5307,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn) * not supported by the hardware, then check that if * P-192 data values are present. */ - if (!memcmp(data->rand192, ZERO_KEY, 16) || - !memcmp(data->hash192, ZERO_KEY, 16)) + if (!crypto_memneq(data->rand192, ZERO_KEY, 16) || + !crypto_memneq(data->hash192, ZERO_KEY, 16)) return 0x00; return 0x01; From 35d91d95a0cd61ebb90e0246dc917fd25e519b8c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Oct 2023 14:12:19 -0700 Subject: [PATCH 206/661] Bluetooth: hci_event: Fix coding style This fixes the following code style problem: ERROR: that open brace { should be on the previous line + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) + { Fixes: 1ffc6f8cc332 ("Bluetooth: Reject connection with the device which has same BD_ADDR") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 9b252a65a7cd7c..7cb41ac1c2582e 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3273,8 +3273,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ - if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) - { + if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", &ev->bdaddr); hci_reject_conn(hdev, &ev->bdaddr); From 9ee252868787ab5a26012d69e335c7371f54563a Mon Sep 17 00:00:00 2001 From: Max Chou Date: Fri, 6 Oct 2023 10:47:07 +0800 Subject: [PATCH 207/661] Bluetooth: btrtl: Ignore error return for hci_devcd_register() If CONFIG_DEV_COREDUMP was not set, it would return -EOPNOTSUPP for hci_devcd_register(). In this commit, ignore error return for hci_devcd_register(). Otherwise Bluetooth initialization will be failed. Fixes: 044014ce85a1 ("Bluetooth: btrtl: Add Realtek devcoredump support") Cc: stable@vger.kernel.org Reported-by: Kirill A. Shutemov Closes: https://lore.kernel.org/all/ZRyqIn0_qqEFBPdy@debian.me/T/ Signed-off-by: Max Chou Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 84c2c2e1122fb6..277d039ecbb429 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -962,13 +962,10 @@ static void btrtl_dmp_hdr(struct hci_dev *hdev, struct sk_buff *skb) skb_put_data(skb, buf, strlen(buf)); } -static int btrtl_register_devcoredump_support(struct hci_dev *hdev) +static void btrtl_register_devcoredump_support(struct hci_dev *hdev) { - int err; + hci_devcd_register(hdev, btrtl_coredump, btrtl_dmp_hdr, NULL); - err = hci_devcd_register(hdev, btrtl_coredump, btrtl_dmp_hdr, NULL); - - return err; } void btrtl_set_driver_name(struct hci_dev *hdev, const char *driver_name) @@ -1255,8 +1252,7 @@ int btrtl_download_firmware(struct hci_dev *hdev, } done: - if (!err) - err = btrtl_register_devcoredump_support(hdev); + btrtl_register_devcoredump_support(hdev); return err; } From 18f547f3fc074500ab5d419cf482240324e73a7e Mon Sep 17 00:00:00 2001 From: Edward AD Date: Tue, 10 Oct 2023 13:36:57 +0800 Subject: [PATCH 208/661] Bluetooth: hci_sock: fix slab oob read in create_monitor_event When accessing hdev->name, the actual string length should prevail Reported-by: syzbot+c90849c50ed209d77689@syzkaller.appspotmail.com Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Edward AD Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 5e4f718073b7ea..72abe54c45dd36 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,7 +488,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, 8); + memcpy(ni->name, hdev->name, strlen(hdev->name)); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; From 9d1a3c74746428102d55371fbf74b484733937d9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 9 Oct 2023 22:31:31 +0200 Subject: [PATCH 209/661] Bluetooth: avoid memcmp() out of bounds warning bacmp() is a wrapper around memcpy(), which contain compile-time checks for buffer overflow. Since the hci_conn_request_evt() also calls bt_dev_dbg() with an implicit NULL pointer check, the compiler is now aware of a case where 'hdev' is NULL and treats this as meaning that zero bytes are available: In file included from net/bluetooth/hci_event.c:32: In function 'bacmp', inlined from 'hci_conn_request_evt' at net/bluetooth/hci_event.c:3276:7: include/net/bluetooth/bluetooth.h:364:16: error: 'memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 364 | return memcmp(ba1, ba2, sizeof(bdaddr_t)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Add another NULL pointer check before the bacmp() to ensure the compiler understands the code flow enough to not warn about it. Since the patch that introduced the warning is marked for stable backports, this one should also go that way to avoid introducing build regressions. Fixes: 1ffc6f8cc332 ("Bluetooth: Reject connection with the device which has same BD_ADDR") Cc: Kees Cook Cc: "Lee, Chun-Yi" Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7cb41ac1c2582e..1e1c9147356c3c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3273,7 +3273,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, /* Reject incoming connection from device with same BD ADDR against * CVE-2020-26555 */ - if (!bacmp(&hdev->bdaddr, &ev->bdaddr)) { + if (hdev && !bacmp(&hdev->bdaddr, &ev->bdaddr)) { bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n", &ev->bdaddr); hci_reject_conn(hdev, &ev->bdaddr); From cb3871b1cd135a6662b732fbc6b3db4afcdb4a64 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Oct 2023 09:31:44 -0700 Subject: [PATCH 210/661] Bluetooth: hci_sock: Correctly bounds check and pad HCI_MON_NEW_INDEX name The code pattern of memcpy(dst, src, strlen(src)) is almost always wrong. In this case it is wrong because it leaves memory uninitialized if it is less than sizeof(ni->name), and overflows ni->name when longer. Normally strtomem_pad() could be used here, but since ni->name is a trailing array in struct hci_mon_new_index, compilers that don't support -fstrict-flex-arrays=3 can't tell how large this array is via __builtin_object_size(). Instead, open-code the helper and use sizeof() since it will work correctly. Additionally mark ni->name as __nonstring since it appears to not be a %NUL terminated C string. Cc: Luiz Augusto von Dentz Cc: Edward AD Cc: Marcel Holtmann Cc: Johan Hedberg Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-bluetooth@vger.kernel.org Cc: netdev@vger.kernel.org Fixes: 18f547f3fc07 ("Bluetooth: hci_sock: fix slab oob read in create_monitor_event") Link: https://lore.kernel.org/lkml/202310110908.F2639D3276@keescook/ Signed-off-by: Kees Cook Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_mon.h | 2 +- net/bluetooth/hci_sock.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 2d5fcda1bcd057..082f89531b8896 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -56,7 +56,7 @@ struct hci_mon_new_index { __u8 type; __u8 bus; bdaddr_t bdaddr; - char name[8]; + char name[8] __nonstring; } __packed; #define HCI_MON_NEW_INDEX_SIZE 16 diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 72abe54c45dd36..3e7cd330d731ac 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -488,7 +488,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) ni->type = hdev->dev_type; ni->bus = hdev->bus; bacpy(&ni->bdaddr, &hdev->bdaddr); - memcpy(ni->name, hdev->name, strlen(hdev->name)); + memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name, + strnlen(hdev->name, sizeof(ni->name)), '\0'); opcode = cpu_to_le16(HCI_MON_NEW_INDEX); break; From 865b080e3229102f160889328ce2e8e97aa65ea0 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 9 Oct 2023 12:14:12 +0200 Subject: [PATCH 211/661] iio: exynos-adc: request second interupt only when touchscreen mode is used Second interrupt is needed only when touchscreen mode is used, so don't request it unconditionally. This removes the following annoying warning during boot: exynos-adc 14d10000.adc: error -ENXIO: IRQ index 1 not found Fixes: 2bb8ad9b44c5 ("iio: exynos-adc: add experimental touchscreen support") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20231009101412.916922-1-m.szyprowski@samsung.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/exynos_adc.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/iio/adc/exynos_adc.c b/drivers/iio/adc/exynos_adc.c index cff1ba57fb16a7..43c8af41b4a9d8 100644 --- a/drivers/iio/adc/exynos_adc.c +++ b/drivers/iio/adc/exynos_adc.c @@ -826,16 +826,26 @@ static int exynos_adc_probe(struct platform_device *pdev) } } + /* leave out any TS related code if unreachable */ + if (IS_REACHABLE(CONFIG_INPUT)) { + has_ts = of_property_read_bool(pdev->dev.of_node, + "has-touchscreen") || pdata; + } + irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; info->irq = irq; - irq = platform_get_irq(pdev, 1); - if (irq == -EPROBE_DEFER) - return irq; + if (has_ts) { + irq = platform_get_irq(pdev, 1); + if (irq == -EPROBE_DEFER) + return irq; - info->tsirq = irq; + info->tsirq = irq; + } else { + info->tsirq = -1; + } info->dev = &pdev->dev; @@ -900,12 +910,6 @@ static int exynos_adc_probe(struct platform_device *pdev) if (info->data->init_hw) info->data->init_hw(info); - /* leave out any TS related code if unreachable */ - if (IS_REACHABLE(CONFIG_INPUT)) { - has_ts = of_property_read_bool(pdev->dev.of_node, - "has-touchscreen") || pdata; - } - if (pdata) info->delay = pdata->delay; else From ff9e8f41513669e290f6e1904e1bc75950584491 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 24 Aug 2023 22:28:49 +1000 Subject: [PATCH 212/661] powerpc/mm: Allow ARCH_FORCE_MAX_ORDER up to 12 Christophe reported that the change to ARCH_FORCE_MAX_ORDER to limit the range to 10 had broken his ability to configure hugepages: # echo 1 > /sys/kernel/mm/hugepages/hugepages-8192kB/nr_hugepages sh: write error: Invalid argument Several of the powerpc defconfigs previously set the ARCH_FORCE_MAX_ORDER value to 12, via the definition in arch/powerpc/configs/fsl-emb-nonhw.config, used by: mpc85xx_defconfig mpc85xx_smp_defconfig corenet32_smp_defconfig corenet64_smp_defconfig mpc86xx_defconfig mpc86xx_smp_defconfig Fix it by increasing the allowed range to 12 to restore the previous behaviour. Fixes: 358e526a1648 ("powerpc/mm: Reinstate ARCH_FORCE_MAX_ORDER ranges") Reported-by: Christophe Leroy Closes: https://lore.kernel.org/all/8011d806-5b30-bf26-2bfe-a08c39d57e20@csgroup.eu/ Tested-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230824122849.942072-1-mpe@ellerman.id.au --- arch/powerpc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3aaadfd2c8eb24..d5d5388973ac76 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -910,7 +910,7 @@ config ARCH_FORCE_MAX_ORDER default "6" if PPC32 && PPC_64K_PAGES range 4 10 if PPC32 && PPC_256K_PAGES default "4" if PPC32 && PPC_256K_PAGES - range 10 10 + range 10 12 default "10" help The kernel page allocator limits the size of maximal physically From 2f3389c73832ad90b63208c0fc281ad080114c7a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 13 Oct 2023 18:48:12 +0530 Subject: [PATCH 213/661] qed: fix LL2 RX buffer allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver allocates the LL2 rx buffers from kmalloc() area to construct the skb using slab_build_skb() The required size allocation seems to have overlooked for accounting both skb_shared_info size and device placement padding bytes which results into the below panic when doing skb_put() for a standard MTU sized frame. skbuff: skb_over_panic: text:ffffffffc0b0225f len:1514 put:1514 head:ff3dabceaf39c000 data:ff3dabceaf39c042 tail:0x62c end:0x566 dev: … skb_panic+0x48/0x4a skb_put.cold+0x10/0x10 qed_ll2b_complete_rx_packet+0x14f/0x260 [qed] qed_ll2_rxq_handle_completion.constprop.0+0x169/0x200 [qed] qed_ll2_rxq_completion+0xba/0x320 [qed] qed_int_sp_dpc+0x1a7/0x1e0 [qed] This patch fixes this by accouting skb_shared_info and device placement padding size bytes when allocating the buffers. Cc: David S. Miller Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support") Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 717a0b3f89bd53..ab5ef254a74832 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -113,7 +113,10 @@ static void qed_ll2b_complete_tx_packet(void *cxt, static int qed_ll2_alloc_buffer(struct qed_dev *cdev, u8 **data, dma_addr_t *phys_addr) { - *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + size_t size = cdev->ll2->rx_size + NET_SKB_PAD + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + *data = kmalloc(size, GFP_ATOMIC); if (!(*data)) { DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); return -ENOMEM; @@ -2589,7 +2592,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) INIT_LIST_HEAD(&cdev->ll2->list); spin_lock_init(&cdev->ll2->lock); - cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + cdev->ll2->rx_size = PRM_DMA_PAD_BYTES_NUM + ETH_HLEN + L1_CACHE_BYTES + params->mtu; /* Allocate memory for LL2. From 8a540e990d7da36813cb71a4a422712bfba448a4 Mon Sep 17 00:00:00 2001 From: Zygo Blaxell Date: Sat, 7 Oct 2023 01:14:21 -0400 Subject: [PATCH 214/661] btrfs: fix stripe length calculation for non-zoned data chunk allocation Commit f6fca3917b4d "btrfs: store chunk size in space-info struct" broke data chunk allocations on non-zoned multi-device filesystems when using default chunk_size. Commit 5da431b71d4b "btrfs: fix the max chunk size and stripe length calculation" partially fixed that, and this patch completes the fix for that case. After commit f6fca3917b4d and 5da431b71d4b, the sequence of events for a data chunk allocation on a non-zoned filesystem is: 1. btrfs_create_chunk calls init_alloc_chunk_ctl, which copies space_info->chunk_size (default 10 GiB) to ctl->max_stripe_len unmodified. Before f6fca3917b4d, ctl->max_stripe_len value was 1 GiB for non-zoned data chunks and not configurable. 2. btrfs_create_chunk calls gather_device_info which consumes and produces more fields of chunk_ctl. 3. gather_device_info multiplies ctl->max_stripe_len by ctl->dev_stripes (which is 1 in all cases except dup) and calls find_free_dev_extent with that number as num_bytes. 4. find_free_dev_extent locates the first dev_extent hole on a device which is at least as large as num_bytes. With default max_chunk_size from f6fca3917b4d, it finds the first hole which is longer than 10 GiB, or the largest hole if that hole is shorter than 10 GiB. This is different from the pre-f6fca3917b4d behavior, where num_bytes is 1 GiB, and find_free_dev_extent may choose a different hole. 5. gather_device_info repeats step 4 with all devices to find the first or largest dev_extent hole that can be allocated on each device. 6. gather_device_info sorts the device list by the hole size on each device, using total unallocated space on each device to break ties, then returns to btrfs_create_chunk with the list. 7. btrfs_create_chunk calls decide_stripe_size_regular. 8. decide_stripe_size_regular finds the largest stripe_len that fits across the first nr_devs device dev_extent holes that were found by gather_device_info (and satisfies other constraints on stripe_len that are not relevant here). 9. decide_stripe_size_regular caps the length of the stripe it computed at 1 GiB. This cap appeared in 5da431b71d4b to correct one of the other regressions introduced in f6fca3917b4d. 10. btrfs_create_chunk creates a new chunk with the above computed size and number of devices. At step 4, gather_device_info() has found a location where stripe up to 10 GiB in length could be allocated on several devices, and selected which devices should have a dev_extent allocated on them, but at step 9, only 1 GiB of the space that was found on each device can be used. This mismatch causes new suboptimal chunk allocation cases that did not occur in pre-f6fca3917b4d kernels. Consider a filesystem using raid1 profile with 3 devices. After some balances, device 1 has 10x 1 GiB unallocated space, while devices 2 and 3 have 1x 10 GiB unallocated space, i.e. the same total amount of space, but distributed across different numbers of dev_extent holes. For visualization, let's ignore all the chunks that were allocated before this point, and focus on the remaining holes: Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10x 1 GiB unallocated) Device 2: [__________] (10 GiB contig unallocated) Device 3: [__________] (10 GiB contig unallocated) Before f6fca3917b4d, the allocator would fill these optimally by allocating chunks with dev_extents on devices 1 and 2 ([12]), 1 and 3 ([13]), or 2 and 3 ([23]): [after 0 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [__________] (10 GiB) Device 3: [__________] (10 GiB) [after 1 chunk allocation] Device 1: [12] [_] [_] [_] [_] [_] [_] [_] [_] [_] Device 2: [12] [_________] (9 GiB) Device 3: [__________] (10 GiB) [after 2 chunk allocations] Device 1: [12] [13] [_] [_] [_] [_] [_] [_] [_] [_] (8 GiB) Device 2: [12] [_________] (9 GiB) Device 3: [13] [_________] (9 GiB) [after 3 chunk allocations] Device 1: [12] [13] [12] [_] [_] [_] [_] [_] [_] [_] (7 GiB) Device 2: [12] [12] [________] (8 GiB) Device 3: [13] [_________] (9 GiB) [...] [after 12 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [_] [_] (2 GiB) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [__] (2 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [__] (2 GiB) [after 13 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [_] (1 GiB) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [_] (1 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [__] (2 GiB) [after 14 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [13] (full) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [_] (1 GiB) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [13] [_] (1 GiB) [after 15 chunk allocations] Device 1: [12] [13] [12] [13] [12] [13] [12] [13] [12] [13] (full) Device 2: [12] [12] [23] [23] [12] [12] [23] [23] [12] [23] (full) Device 3: [13] [13] [23] [23] [13] [23] [13] [23] [13] [23] (full) This allocates all of the space with no waste. The sorting function used by gather_device_info considers free space holes above 1 GiB in length to be equal to 1 GiB, so once find_free_dev_extent locates a sufficiently long hole on each device, all the holes appear equal in the sort, and the comparison falls back to sorting devices by total free space. This keeps usable space on each device equal so they can all be filled completely. After f6fca3917b4d, the allocator prefers the devices with larger holes over the devices with more free space, so it makes bad allocation choices: [after 1 chunk allocation] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [_________] (9 GiB) Device 3: [23] [_________] (9 GiB) [after 2 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [________] (8 GiB) Device 3: [23] [23] [________] (8 GiB) [after 3 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [23] [_______] (7 GiB) Device 3: [23] [23] [23] [_______] (7 GiB) [...] [after 9 chunk allocations] Device 1: [_] [_] [_] [_] [_] [_] [_] [_] [_] [_] (10 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) [after 10 chunk allocations] Device 1: [12] [_] [_] [_] [_] [_] [_] [_] [_] [_] (9 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [12] (full) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [_] (1 GiB) [after 11 chunk allocations] Device 1: [12] [13] [_] [_] [_] [_] [_] [_] [_] [_] (8 GiB) Device 2: [23] [23] [23] [23] [23] [23] [23] [23] [12] (full) Device 3: [23] [23] [23] [23] [23] [23] [23] [23] [13] (full) No further allocations are possible, with 8 GiB wasted (4 GiB of data space). The sort in gather_device_info now considers free space in holes longer than 1 GiB to be distinct, so it will prefer devices 2 and 3 over device 1 until all but 1 GiB is allocated on devices 2 and 3. At that point, with only 1 GiB unallocated on every device, the largest hole length on each device is equal at 1 GiB, so the sort finally moves to ordering the devices with the most free space, but by this time it is too late to make use of the free space on device 1. Note that it's possible to contrive a case where the pre-f6fca3917b4d allocator fails the same way, but these cases generally have extensive dev_extent fragmentation as a precondition (e.g. many holes of 768M in length on one device, and few holes 1 GiB in length on the others). With the regression in f6fca3917b4d, bad chunk allocation can occur even under optimal conditions, when all dev_extent holes are exact multiples of stripe_len in length, as in the example above. Also note that post-f6fca3917b4d kernels do treat dev_extent holes larger than 10 GiB as equal, so the bad behavior won't show up on a freshly formatted filesystem; however, as the filesystem ages and fills up, and holes ranging from 1 GiB to 10 GiB in size appear, the problem can show up as a failure to balance after adding or removing devices, or an unexpected shortfall in available space due to unequal allocation. To fix the regression and make data chunk allocation work again, set ctl->max_stripe_len back to the original SZ_1G, or space_info->chunk_size if that's smaller (the latter can happen if the user set space_info->chunk_size to less than 1 GiB via sysfs, or it's a 32 MiB system chunk with a hardcoded chunk_size and stripe_len). While researching the background of the earlier commits, I found that an identical fix was already proposed at: https://lore.kernel.org/linux-btrfs/de83ac46-a4a3-88d3-85ce-255b7abc5249@gmx.com/ The previous review missed one detail: ctl->max_stripe_len is used before decide_stripe_size_regular() is called, when it is too late for the changes in that function to have any effect. ctl->max_stripe_len is not used directly by decide_stripe_size_regular(), but the parameter does heavily influence the per-device free space data presented to the function. Fixes: f6fca3917b4d ("btrfs: store chunk size in space-info struct") CC: stable@vger.kernel.org # 6.1+ Link: https://lore.kernel.org/linux-btrfs/20231007051421.19657-1-ce3g8jdj@umail.furryterror.org/ Reviewed-by: Qu Wenruo Signed-off-by: Zygo Blaxell Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e3a3769fd92e8c..43fee429834a8c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5111,7 +5111,7 @@ static void init_alloc_chunk_ctl_policy_regular( ASSERT(space_info); ctl->max_chunk_size = READ_ONCE(space_info->chunk_size); - ctl->max_stripe_size = ctl->max_chunk_size; + ctl->max_stripe_size = min_t(u64, ctl->max_chunk_size, SZ_1G); if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM) ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK); From fc8b2a619469378717e7270d2a4e1ef93c585f7a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 11 Oct 2023 10:01:14 -0400 Subject: [PATCH 215/661] net: more strict VIRTIO_NET_HDR_GSO_UDP_L4 validation Syzbot reported two new paths to hit an internal WARNING using the new virtio gso type VIRTIO_NET_HDR_GSO_UDP_L4. RIP: 0010:skb_checksum_help+0x4a2/0x600 net/core/dev.c:3260 skb len=64521 gso_size=344 and RIP: 0010:skb_warn_bad_offload+0x118/0x240 net/core/dev.c:3262 Older virtio types have historically had loose restrictions, leading to many entirely impractical fuzzer generated packets causing problems deep in the kernel stack. Ideally, we would have had strict validation for all types from the start. New virtio types can have tighter validation. Limit UDP GSO packets inserted via virtio to the same limits imposed by the UDP_SEGMENT socket interface: 1. must use checksum offload 2. checksum offload matches UDP header 3. no more segments than UDP_MAX_SEGMENTS 4. UDP GSO does not take modifier flags, notably SKB_GSO_TCP_ECN Fixes: 860b7f27b8f7 ("linux/virtio_net.h: Support USO offload in vnet header.") Reported-by: syzbot+01cdbc31e9c0ae9b33ac@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005039270605eb0b7f@google.com/ Reported-by: syzbot+c99d835ff081ca30f986@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/0000000000005426680605eb0b9f@google.com/ Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 7b4dd69555e497..27cc1d4643219a 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -3,8 +3,8 @@ #define _LINUX_VIRTIO_NET_H #include +#include #include -#include #include static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) @@ -151,9 +151,22 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); - /* UFO may not include transport header in gso_size. */ - if (gso_type & SKB_GSO_UDP) + switch (gso_type & ~SKB_GSO_TCP_ECN) { + case SKB_GSO_UDP: + /* UFO may not include transport header in gso_size. */ nh_off -= thlen; + break; + case SKB_GSO_UDP_L4: + if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) + return -EINVAL; + if (skb->csum_offset != offsetof(struct udphdr, check)) + return -EINVAL; + if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) + return -EINVAL; + if (gso_type != SKB_GSO_UDP_L4) + return -EINVAL; + break; + } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) From 92e37f20f20a23fec4626ae72eda50f127acb130 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:36 -0400 Subject: [PATCH 216/661] selftests: openvswitch: Add version check for pyroute2 Paolo Abeni reports that on some systems the pyroute2 version isn't new enough to run the test suite. Ensure that we support a minimum version of 0.6 for all cases (which does include the existing ones). The 0.6.1 version was released in May of 2021, so should be propagated to most installations at this point. The alternative that Paolo proposed was to only skip when the add-flow is being run. This would be okay for most cases, except if a future test case is added that needs to do flow dump without an associated add (just guessing). In that case, it could also be broken and we would need additional skip logic anyway. Just draw a line in the sand now. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Reported-by: Paolo Abeni Closes: https://lore.kernel.org/lkml/8470c431e0930d2ea204a9363a60937289b7fdbe.camel@redhat.com/ Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 +- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 9c2012d70b08ec..220c3356901ef1 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -525,7 +525,7 @@ run_test() { fi if python3 ovs-dpctl.py -h 2>&1 | \ - grep "Need to install the python" >/dev/null 2>&1; then + grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 912dc8c4908582..e4c24d5edf20cb 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -28,8 +28,10 @@ from pyroute2.netlink import nlmsg_atoms from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + import pyroute2 + except ModuleNotFoundError: - print("Need to install the python pyroute2 package.") + print("Need to install the python pyroute2 package >= 0.6.") sys.exit(0) @@ -1998,6 +2000,12 @@ def main(argv): nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions + # version check for pyroute2 + prverscheck = pyroute2.__version__.split(".") + if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: + print("Need to upgrade the python pyroute2 package to >= 0.6.") + sys.exit(0) + parser = argparse.ArgumentParser() parser.add_argument( "-v", From af846afad5ca1c1a24d320adf9e48255e97db84e Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:37 -0400 Subject: [PATCH 217/661] selftests: openvswitch: Catch cases where the tests are killed In case of fatal signal, or early abort at least cleanup the current test case. Fixes: 25f16c873fb1 ("selftests: add openvswitch selftest suite") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/openvswitch.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 220c3356901ef1..2a0112be7ead58 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -3,6 +3,8 @@ # # OVS kernel module self tests +trap ovs_exit_sig EXIT TERM INT ERR + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 From 76035fd12cb9046be00ffb9d4262b5b3277d5068 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:38 -0400 Subject: [PATCH 218/661] selftests: openvswitch: Skip drop testing on older kernels Kernels that don't have support for openvswitch drop reasons also won't have the drop counter reasons, so we should skip the test completely. It previously wasn't possible to build a test case for this without polluting the datapath, so we introduce a mechanism to clear all the flows from a datapath allowing us to test for explicit drop actions, and then clear the flows to build the original test case. Fixes: 4242029164d6 ("selftests: openvswitch: add explicit drop testcase") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- .../selftests/net/openvswitch/openvswitch.sh | 17 ++++++++++ .../selftests/net/openvswitch/ovs-dpctl.py | 34 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 2a0112be7ead58..f8499d4c87f3f7 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -144,6 +144,12 @@ ovs_add_flow () { return 0 } +ovs_del_flows () { + info "Deleting all flows from DP: sbx:$1 br:$2" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2" + return 0 +} + ovs_drop_record_and_run () { local sbx=$1 shift @@ -200,6 +206,17 @@ test_drop_reason() { ip netns exec server ip addr add 172.31.110.20/24 dev s1 ip netns exec server ip link set s1 up + # Check if drop reasons can be sent + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null + if [ $? == 1 ]; then + info "no support for drop reasons - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_drop_reason" dropreason + # Allow ARP ovs_add_flow "test_drop_reason" dropreason \ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index e4c24d5edf20cb..10b8f31548f840 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1906,6 +1906,32 @@ def add_flow(self, dpifindex, flowmsg): raise ne return reply + def del_flows(self, dpifindex): + """ + Send a del message to the kernel that will drop all flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + """ + + flowmsg = OvsFlow.ovs_flow_msg() + flowmsg["cmd"] = OVS_FLOW_CMD_DEL + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + def dump(self, dpifindex, flowspec=None): """ Returns a list of messages containing flows. @@ -2068,6 +2094,9 @@ def main(argv): addflcmd.add_argument("flow", help="Flow specification") addflcmd.add_argument("acts", help="Flow actions") + delfscmd = subparsers.add_parser("del-flows") + delfscmd.add_argument("flsbr", help="Datapath name") + args = parser.parse_args() if args.verbose > 0: @@ -2151,6 +2180,11 @@ def main(argv): flow = OvsFlow.ovs_flow_msg() flow.parse(args.flow, args.acts, rep["dpifindex"]) ovsflow.add_flow(rep["dpifindex"], flow) + elif hasattr(args, "flsbr"): + rep = ovsdp.info(args.flsbr, 0) + if rep is None: + print("DP '%s' not found." % args.flsbr) + ovsflow.del_flows(rep["dpifindex"]) return 0 From 8eff0e062201e26739c74ac2355b7362622b7190 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 11 Oct 2023 15:49:39 -0400 Subject: [PATCH 219/661] selftests: openvswitch: Fix the ct_tuple for v4 The ct_tuple v4 data structure decode / encode routines were using the v6 IP address decode and relying on default encode. This could cause exceptions during encode / decode depending on how a ct4 tuple would appear in a netlink message. Caught during code review. Fixes: e52b07aa1a54 ("selftests: openvswitch: add flow dump support") Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 10b8f31548f840..b97e621face958 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1119,12 +1119,14 @@ class ovs_key_ct_tuple_ipv4(ovs_key_proto): "src", lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ( "dst", "dst", - lambda x: str(ipaddress.IPv6Address(x)), + lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), From 52480e1f1a259c93d749ba3961af0bffedfe7a7a Mon Sep 17 00:00:00 2001 From: Puliang Lu Date: Mon, 16 Oct 2023 15:36:16 +0800 Subject: [PATCH 220/661] USB: serial: option: add Fibocom to DELL custom modem FM101R-GL Update the USB serial option driver support for the Fibocom FM101R-GL LTE modules as there are actually several different variants. - VID:PID 413C:8213, FM101R-GL are laptop M.2 cards (with MBIM interfaces for Linux) - VID:PID 413C:8215, FM101R-GL ESIM are laptop M.2 cards (with MBIM interface for Linux) 0x8213: mbim, tty 0x8215: mbim, tty T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=413c ProdID=8213 Rev= 5.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=a3b7cbf0 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms T: Bus=04 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=413c ProdID=8215 Rev= 5.04 S: Manufacturer=Fibocom Wireless Inc. S: Product=Fibocom FM101-GL Module S: SerialNumber=a3b7cbf0 C:* #Ifs= 3 Cfg#= 1 Atr=a0 MxPwr=896mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=40 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Puliang Lu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index c0908e6d49210b..45dcfaadaf98eb 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -203,6 +203,9 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5829E_ESIM 0x81e4 #define DELL_PRODUCT_5829E 0x81e6 +#define DELL_PRODUCT_FM101R 0x8213 +#define DELL_PRODUCT_FM101R_ESIM 0x8215 + #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da #define KYOCERA_PRODUCT_KPC680 0x180a @@ -1108,6 +1111,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM), .driver_info = RSVD(0) | RSVD(6) }, + { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R, 0xff) }, + { USB_DEVICE_INTERFACE_CLASS(DELL_VENDOR_ID, DELL_PRODUCT_FM101R_ESIM, 0xff) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, From f6ca3fb6978f94d95ee79f95085fc22e71ca17cc Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Fri, 22 Sep 2023 16:17:16 +0200 Subject: [PATCH 221/661] mtd: rawnand: Ensure the nand chip supports cached reads Both the JEDEC and ONFI specification say that read cache sequential support is an optional command. This means that we not only need to check whether the individual controller supports the command, we also need to check the parameter pages for both ONFI and JEDEC NAND flashes before enabling sequential cache reads. This fixes support for NAND flashes which don't support enabling cache reads, i.e. Samsung K9F4G08U0F or Toshiba TC58NVG0S3HTA00. Sequential cache reads are now only available for ONFI and JEDEC devices, if individual vendors implement this, it needs to be enabled per vendor. Tested on i.MX6Q with a Samsung NAND flash chip that doesn't support sequential reads. Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Cc: stable@vger.kernel.org Signed-off-by: Rouven Czerwinski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230922141717.35977-1-r.czerwinski@pengutronix.de --- drivers/mtd/nand/raw/nand_base.c | 3 +++ drivers/mtd/nand/raw/nand_jedec.c | 3 +++ drivers/mtd/nand/raw/nand_onfi.c | 3 +++ include/linux/mtd/jedec.h | 3 +++ include/linux/mtd/onfi.h | 1 + include/linux/mtd/rawnand.h | 2 ++ 6 files changed, 15 insertions(+) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d4b55155aeae90..1fcac403cee60f 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5110,6 +5110,9 @@ static void rawnand_check_cont_read_support(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + if (!chip->parameters.supports_read_cache) + return; + if (chip->read_retries) return; diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index 836757717660b9..b3cc8f36052916 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -94,6 +94,9 @@ int nand_jedec_detect(struct nand_chip *chip) goto free_jedec_param_page; } + if (p->opt_cmd[0] & JEDEC_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + memorg->pagesize = le32_to_cpu(p->byte_per_page); mtd->writesize = memorg->pagesize; diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index f15ef90aec8cd6..861975e44b552d 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -303,6 +303,9 @@ int nand_onfi_detect(struct nand_chip *chip) ONFI_FEATURE_ADDR_TIMING_MODE, 1); } + if (le16_to_cpu(p->opt_cmd) & ONFI_OPT_CMD_READ_CACHE) + chip->parameters.supports_read_cache = true; + onfi = kzalloc(sizeof(*onfi), GFP_KERNEL); if (!onfi) { ret = -ENOMEM; diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h index 0b6b59f7cfbdcd..56047a4e54c9c2 100644 --- a/include/linux/mtd/jedec.h +++ b/include/linux/mtd/jedec.h @@ -21,6 +21,9 @@ struct jedec_ecc_info { /* JEDEC features */ #define JEDEC_FEATURE_16_BIT_BUS (1 << 0) +/* JEDEC Optional Commands */ +#define JEDEC_OPT_CMD_READ_CACHE BIT(1) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h index a7376f9beddfd6..55ab2e4d62f947 100644 --- a/include/linux/mtd/onfi.h +++ b/include/linux/mtd/onfi.h @@ -55,6 +55,7 @@ #define ONFI_SUBFEATURE_PARAM_LEN 4 /* ONFI optional commands SET/GET FEATURES supported? */ +#define ONFI_OPT_CMD_READ_CACHE BIT(1) #define ONFI_OPT_CMD_SET_GET_FEATURES BIT(2) struct nand_onfi_params { diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 90a141ba2a5acc..c29ace15a053a6 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -225,6 +225,7 @@ struct gpio_desc; * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name * @supports_set_get_features: The NAND chip supports setting/getting features + * @supports_read_cache: The NAND chip supports read cache operations * @set_feature_list: Bitmap of features that can be set * @get_feature_list: Bitmap of features that can be get * @onfi: ONFI specific parameters @@ -233,6 +234,7 @@ struct nand_parameters { /* Generic parameters */ const char *model; bool supports_set_get_features; + bool supports_read_cache; DECLARE_BITMAP(set_feature_list, ONFI_FEATURE_NUMBER); DECLARE_BITMAP(get_feature_list, ONFI_FEATURE_NUMBER); From e07744b43d3ad10b040f0ec464b6323ca96903d6 Mon Sep 17 00:00:00 2001 From: Liming Wu Date: Sun, 8 Oct 2023 11:17:33 +0800 Subject: [PATCH 222/661] tools/virtio: Add dma sync api for virtio test Fixes: 8bd2f71054bd ("virtio_ring: introduce dma sync api for virtqueue") also add dma sync api for virtio test. Signed-off-by: Liming Wu Message-Id: <20231008031734.1095-1-liming.wu@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/dma-mapping.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 834a90bd3270e5..822ecaa8e4df26 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -24,11 +24,23 @@ enum dma_data_direction { #define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o)) #define dma_map_single(d, p, s, dir) (virt_to_phys(p)) +#define dma_map_single_attrs(d, p, s, dir, a) (virt_to_phys(p)) #define dma_mapping_error(...) (0) #define dma_unmap_single(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) #define dma_unmap_page(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) +#define sg_dma_address(sg) (0) +#define dma_need_sync(v, a) (0) +#define dma_unmap_single_attrs(d, a, s, r, t) do { \ + (void)(d); (void)(a); (void)(s); (void)(r); (void)(t); \ +} while (0) +#define dma_sync_single_range_for_cpu(d, a, o, s, r) do { \ + (void)(d); (void)(a); (void)(o); (void)(s); (void)(r); \ +} while (0) +#define dma_sync_single_range_for_device(d, a, o, s, r) do { \ + (void)(d); (void)(a); (void)(o); (void)(s); (void)(r); \ +} while (0) #define dma_max_mapping_size(...) SIZE_MAX #endif From 63e8b94ad1840f02462633abdb363397f56bc642 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Thu, 21 Sep 2023 15:14:12 +0800 Subject: [PATCH 223/661] s390/cio: fix a memleak in css_alloc_subchannel When dma_set_coherent_mask() fails, sch->lock has not been freed, which is allocated in css_sch_create_locks(), leading to a memleak. Fixes: 4520a91a976e ("s390/cio: use dma helpers for setting masks") Signed-off-by: Dinghao Liu Message-Id: <20230921071412.13806-1-dinghao.liu@zju.edu.cn> Link: https://lore.kernel.org/linux-s390/bd38baa8-7b9d-4d89-9422-7e943d626d6e@linux.ibm.com/ Reviewed-by: Halil Pasic Reviewed-by: Peter Oberparleiter Signed-off-by: Vasily Gorbik --- drivers/s390/cio/css.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 3ef636935a547c..3ff46fc694f85e 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -233,17 +233,19 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid, */ ret = dma_set_coherent_mask(&sch->dev, DMA_BIT_MASK(31)); if (ret) - goto err; + goto err_lock; /* * But we don't have such restrictions imposed on the stuff that * is handled by the streaming API. */ ret = dma_set_mask(&sch->dev, DMA_BIT_MASK(64)); if (ret) - goto err; + goto err_lock; return sch; +err_lock: + kfree(sch->lock); err: kfree(sch); return ERR_PTR(ret); From 327899674eef18f96644be87aa5510b7523fe4f6 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 12 Oct 2023 11:06:21 +0200 Subject: [PATCH 224/661] s390/kasan: handle DCSS mapping in memory holes When physical memory is defined under z/VM using DEF STOR CONFIG, there may be memory holes that are not hotpluggable memory. In such cases, DCSS mapping could be placed in one of these memory holes. Subsequently, attempting memory access to such DCSS mapping would result in a kasan failure because there is no shadow memory mapping for it. To maintain consistency with cases where DCSS mapping is positioned after the kernel identity mapping, which is then covered by kasan zero shadow mapping, handle the scenario above by populating zero shadow mapping for memory holes where DCSS mapping could potentially be placed. Reviewed-by: Heiko Carstens Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- arch/s390/boot/vmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 01257ce3b89c00..442a74f113cbfd 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -57,6 +57,7 @@ static void kasan_populate_shadow(void) pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long memgap_start = 0; unsigned long untracked_end; unsigned long start, end; int i; @@ -101,8 +102,12 @@ static void kasan_populate_shadow(void) * +- shadow end ----+---------+- shadow end ---+ */ - for_each_physmem_usable_range(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) { kasan_populate(start, end, POPULATE_KASAN_MAP_SHADOW); + if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) + kasan_populate(memgap_start, start, POPULATE_KASAN_ZERO_SHADOW); + memgap_start = end; + } if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { untracked_end = VMALLOC_START; /* shallowly populate kasan shadow for vmalloc and modules */ From 3b401e30c249849d803de6c332dad2a595a58658 Mon Sep 17 00:00:00 2001 From: Karolina Stolarek Date: Mon, 16 Oct 2023 14:15:25 +0200 Subject: [PATCH 225/661] drm/ttm: Reorder sys manager cleanup step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current cleanup flow, we could trigger a NULL pointer dereference if there is a delayed destruction of a BO with a system resource that gets executed on drain_workqueue() call, as we attempt to free a resource using an already released resource manager. Remove the device from the device list and drain its workqueue before releasing the system domain manager in ttm_device_fini(). Signed-off-by: Karolina Stolarek Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20231016121525.2237838-1-karolina.stolarek@intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 7726a72befc544..d48b39132b3242 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -232,10 +232,6 @@ void ttm_device_fini(struct ttm_device *bdev) struct ttm_resource_manager *man; unsigned i; - man = ttm_manager_type(bdev, TTM_PL_SYSTEM); - ttm_resource_manager_set_used(man, false); - ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); - mutex_lock(&ttm_global_mutex); list_del(&bdev->device_list); mutex_unlock(&ttm_global_mutex); @@ -243,6 +239,10 @@ void ttm_device_fini(struct ttm_device *bdev) drain_workqueue(bdev->wq); destroy_workqueue(bdev->wq); + man = ttm_manager_type(bdev, TTM_PL_SYSTEM); + ttm_resource_manager_set_used(man, false); + ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); + spin_lock(&bdev->lru_lock); for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) if (list_empty(&man->lru[0])) From c8befdc411e5fd1bf95a13e8744c8ca79b412bee Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 13 Oct 2023 16:57:05 +0200 Subject: [PATCH 226/661] pinctrl: qcom: lpass-lpi: fix concurrent register updates The Qualcomm LPASS LPI pin controller driver uses one lock for guarding Read-Modify-Write code for slew rate registers. However the pin configuration and muxing registers have exactly the same RMW code but are not protected. Pin controller framework does not provide locking here, thus it is possible to trigger simultaneous change of pin configuration registers resulting in non-atomic changes. Protect from concurrent access by re-using the same lock used to cover the slew rate register. Using the same lock instead of adding second one will make more sense, once we add support for newer Qualcomm SoC, where slew rate is configured in the same register as pin configuration/muxing. Fixes: 6e261d1090d6 ("pinctrl: qcom: Add sm8250 lpass lpi pinctrl driver") Cc: stable@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231013145705.219954-1-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-lpass-lpi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c index e5a418026ba339..0b2839d27fd671 100644 --- a/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c +++ b/drivers/pinctrl/qcom/pinctrl-lpass-lpi.c @@ -32,7 +32,8 @@ struct lpi_pinctrl { char __iomem *tlmm_base; char __iomem *slew_base; struct clk_bulk_data clks[MAX_LPI_NUM_CLKS]; - struct mutex slew_access_lock; + /* Protects from concurrent register updates */ + struct mutex lock; DECLARE_BITMAP(ever_gpio, MAX_NR_GPIO); const struct lpi_pinctrl_variant_data *data; }; @@ -103,6 +104,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function, if (WARN_ON(i == g->nfuncs)) return -EINVAL; + mutex_lock(&pctrl->lock); val = lpi_gpio_read(pctrl, pin, LPI_GPIO_CFG_REG); /* @@ -128,6 +130,7 @@ static int lpi_gpio_set_mux(struct pinctrl_dev *pctldev, unsigned int function, u32p_replace_bits(&val, i, LPI_GPIO_FUNCTION_MASK); lpi_gpio_write(pctrl, pin, LPI_GPIO_CFG_REG, val); + mutex_unlock(&pctrl->lock); return 0; } @@ -233,14 +236,14 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, if (slew_offset == LPI_NO_SLEW) break; - mutex_lock(&pctrl->slew_access_lock); + mutex_lock(&pctrl->lock); sval = ioread32(pctrl->slew_base + LPI_SLEW_RATE_CTL_REG); sval &= ~(LPI_SLEW_RATE_MASK << slew_offset); sval |= arg << slew_offset; iowrite32(sval, pctrl->slew_base + LPI_SLEW_RATE_CTL_REG); - mutex_unlock(&pctrl->slew_access_lock); + mutex_unlock(&pctrl->lock); break; default: return -EINVAL; @@ -256,6 +259,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, lpi_gpio_write(pctrl, group, LPI_GPIO_VALUE_REG, val); } + mutex_lock(&pctrl->lock); val = lpi_gpio_read(pctrl, group, LPI_GPIO_CFG_REG); u32p_replace_bits(&val, pullup, LPI_GPIO_PULL_MASK); @@ -264,6 +268,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group, u32p_replace_bits(&val, output_enabled, LPI_GPIO_OE_MASK); lpi_gpio_write(pctrl, group, LPI_GPIO_CFG_REG, val); + mutex_unlock(&pctrl->lock); return 0; } @@ -461,7 +466,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev) pctrl->chip.label = dev_name(dev); pctrl->chip.can_sleep = false; - mutex_init(&pctrl->slew_access_lock); + mutex_init(&pctrl->lock); pctrl->ctrl = devm_pinctrl_register(dev, &pctrl->desc, pctrl); if (IS_ERR(pctrl->ctrl)) { @@ -483,7 +488,7 @@ int lpi_pinctrl_probe(struct platform_device *pdev) return 0; err_pinctrl: - mutex_destroy(&pctrl->slew_access_lock); + mutex_destroy(&pctrl->lock); clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks); return ret; @@ -495,7 +500,7 @@ int lpi_pinctrl_remove(struct platform_device *pdev) struct lpi_pinctrl *pctrl = platform_get_drvdata(pdev); int i; - mutex_destroy(&pctrl->slew_access_lock); + mutex_destroy(&pctrl->lock); clk_bulk_disable_unprepare(MAX_LPI_NUM_CLKS, pctrl->clks); for (i = 0; i < pctrl->data->npins; i++) From 88630e91f12677848c0c4a5790ec0d691f8859fa Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 12 Oct 2023 14:49:27 -0400 Subject: [PATCH 227/661] drm/edid: add 8 bpc quirk to the BenQ GW2765 The BenQ GW2765 reports that it supports higher (> 8) bpc modes, but when trying to set them we end up with a black screen. So, limit it to 8 bpc modes. Cc: stable@vger.kernel.org # 6.5+ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2610 Reviewed-by: Harry Wentland Signed-off-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20231012184927.133137-1-hamza.mahfooz@amd.com --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 340da8257b51b4..4b71040ae5be5c 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -123,6 +123,9 @@ static const struct edid_quirk { /* AEO model 0 reports 8 bpc, but is a 6 bpc panel */ EDID_QUIRK('A', 'E', 'O', 0, EDID_QUIRK_FORCE_6BPC), + /* BenQ GW2765 */ + EDID_QUIRK('B', 'N', 'Q', 0x78d6, EDID_QUIRK_FORCE_8BPC), + /* BOE model on HP Pavilion 15-n233sl reports 8 bpc, but is a 6 bpc panel */ EDID_QUIRK('B', 'O', 'E', 0x78b, EDID_QUIRK_FORCE_6BPC), From dcc583c225e659d5da34b4ad83914fd6b51e3dbf Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 4 Oct 2023 16:32:24 +0800 Subject: [PATCH 228/661] drm/mediatek: Correctly free sg_table in gem prime vmap The MediaTek DRM driver implements GEM PRIME vmap by fetching the sg_table for the object, iterating through the pages, and then vmapping them. In essence, unlike the GEM DMA helpers which vmap when the object is first created or imported, the MediaTek version does it on request. Unfortunately, the code never correctly frees the sg_table contents. This results in a kernel memory leak. On a Hayato device with a text console on the internal display, this results in the system running out of memory in a few days from all the console screen cursor updates. Add sg_free_table() to correctly free the contents of the sg_table. This was missing despite explicitly required by mtk_gem_prime_get_sg_table(). Also move the "out" shortcut label to after the kfree() call for the sg_table. Having sg_free_table() together with kfree() makes more sense. The shortcut is only used when the object already has a kernel address, in which case the pointer is NULL and kfree() does nothing. Hence this change causes no functional change. Fixes: 3df64d7b0a4f ("drm/mediatek: Implement gem prime vmap/vunmap function") Cc: Signed-off-by: Chen-Yu Tsai Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20231004083226.1940055-1-wenst@chromium.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_gem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index 9f364df52478d8..0e0a41b2f57f05 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -239,6 +239,7 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) npages = obj->size >> PAGE_SHIFT; mtk_gem->pages = kcalloc(npages, sizeof(*mtk_gem->pages), GFP_KERNEL); if (!mtk_gem->pages) { + sg_free_table(sgt); kfree(sgt); return -ENOMEM; } @@ -248,12 +249,15 @@ int mtk_drm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map) mtk_gem->kvaddr = vmap(mtk_gem->pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); if (!mtk_gem->kvaddr) { + sg_free_table(sgt); kfree(sgt); kfree(mtk_gem->pages); return -ENOMEM; } -out: + sg_free_table(sgt); kfree(sgt); + +out: iosys_map_set_vaddr(map, mtk_gem->kvaddr); return 0; From 1c29d80134ac116e0196c7bad58a2121381b679c Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Fri, 13 Oct 2023 13:20:04 +0100 Subject: [PATCH 229/661] misc: fastrpc: Reset metadata buffer to avoid incorrect free Metadata buffer is allocated during get_args for any remote call. This buffer carries buffers, fdlists and other payload information for the call. If the buffer is not reset, put_args might find some garbage FDs in the fdlist which might have an existing mapping in the list. This could result in improper freeing of FD map when DSP might still be using the buffer. Added change to reset the metadata buffer after allocation. Fixes: 8f6c1d8c4f0c ("misc: fastrpc: Add fdlist implementation") Cc: stable Signed-off-by: Ekansh Gupta Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013122007.174464-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index a66b7c111cd519..fb921975b56d5c 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -958,6 +958,7 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) if (err) return err; + memset(ctx->buf->virt, 0, pkt_size); rpra = ctx->buf->virt; list = fastrpc_invoke_buf_start(rpra, ctx->nscalars); pages = fastrpc_phy_page_start(list, ctx->nscalars); From 206484303892a2a36c0c3414030ddfef658a4e70 Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Fri, 13 Oct 2023 13:20:05 +0100 Subject: [PATCH 230/661] misc: fastrpc: Free DMA handles for RPC calls with no arguments The FDs for DMA handles to be freed is updated in fdlist by DSP over a remote call. This holds true even for remote calls with no arguments. To handle this, get_args and put_args are needed to be called for remote calls with no arguments also as fdlist is allocated in get_args and FDs updated in fdlist is freed in put_args. Fixes: 8f6c1d8c4f0c ("misc: fastrpc: Add fdlist implementation") Cc: stable Signed-off-by: Ekansh Gupta Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013122007.174464-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index fb921975b56d5c..a52701c1b018a6 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1091,6 +1091,7 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, } } + /* Clean up fdlist which is updated by DSP */ for (i = 0; i < FASTRPC_MAX_FDLIST; i++) { if (!fdlist[i]) break; @@ -1157,11 +1158,9 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (IS_ERR(ctx)) return PTR_ERR(ctx); - if (ctx->nscalars) { - err = fastrpc_get_args(kernel, ctx); - if (err) - goto bail; - } + err = fastrpc_get_args(kernel, ctx); + if (err) + goto bail; /* make sure that all CPU memory writes are seen by DSP */ dma_wmb(); @@ -1185,14 +1184,12 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (err) goto bail; - if (ctx->nscalars) { - /* make sure that all memory writes by DSP are seen by CPU */ - dma_rmb(); - /* populate all the output buffers with results */ - err = fastrpc_put_args(ctx, kernel); - if (err) - goto bail; - } + /* make sure that all memory writes by DSP are seen by CPU */ + dma_rmb(); + /* populate all the output buffers with results */ + err = fastrpc_put_args(ctx, kernel); + if (err) + goto bail; bail: if (err != -ERESTARTSYS && err != -ETIMEDOUT) { From 1c8093591d1e372d700fe65423e7315a8ecf721b Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Fri, 13 Oct 2023 13:20:06 +0100 Subject: [PATCH 231/661] misc: fastrpc: Clean buffers on remote invocation failures With current design, buffers and dma handles are not freed in case of remote invocation failures returned from DSP. This could result in buffer leakings and dma handle pointing to wrong memory in the fastrpc kernel. Adding changes to clean buffers and dma handles even when remote invocation to DSP returns failures. Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") Cc: stable Signed-off-by: Ekansh Gupta Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013122007.174464-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index a52701c1b018a6..3cdc58488db110 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1176,11 +1176,6 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, err = wait_for_completion_interruptible(&ctx->work); } - if (err) - goto bail; - - /* Check the response from remote dsp */ - err = ctx->retval; if (err) goto bail; @@ -1191,6 +1186,11 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (err) goto bail; + /* Check the response from remote dsp */ + err = ctx->retval; + if (err) + goto bail; + bail: if (err != -ERESTARTSYS && err != -ETIMEDOUT) { /* We are done with this compute context */ From 509143385db364c67556a914bef6c9a42fd2c74c Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Fri, 13 Oct 2023 13:20:07 +0100 Subject: [PATCH 232/661] misc: fastrpc: Unmap only if buffer is unmapped from DSP For unmapping any buffer from kernel, it should first be unmapped from DSP. In case unmap from DSP request fails, the map should not be removed from kernel as it might lead to SMMU faults and other memory issues. Fixes: 5c1b97c7d7b7 ("misc: fastrpc: add support for FASTRPC_IOCTL_MEM_MAP/UNMAP") Cc: stable Signed-off-by: Ekansh Gupta Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013122007.174464-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 3cdc58488db110..1c6c62a7f7f553 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1981,11 +1981,13 @@ static int fastrpc_req_mem_unmap_impl(struct fastrpc_user *fl, struct fastrpc_me sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MEM_UNMAP, 1, 0); err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, &args[0]); - fastrpc_map_put(map); - if (err) + if (err) { dev_err(dev, "unmmap\tpt fd = %d, 0x%09llx error\n", map->fd, map->raddr); + return err; + } + fastrpc_map_put(map); - return err; + return 0; } static int fastrpc_req_mem_unmap(struct fastrpc_user *fl, char __user *argp) From 414a98abbefd82d591f4e2d1efd2917bcd3b6f6d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 13 Oct 2023 13:49:02 +0100 Subject: [PATCH 233/661] nvmem: imx: correct nregs for i.MX6SLL The nregs for i.MX6SLL should be 80 per fuse map, correct it. Fixes: 6da27821a6f5 ("nvmem: imx-ocotp: add support for imx6sll") Cc: Stable@vger.kernel.org Signed-off-by: Peng Fan Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013124904.175782-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/imx-ocotp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index a223d9537f224d..7302f25b14a1b3 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -498,7 +498,7 @@ static const struct ocotp_params imx6sl_params = { }; static const struct ocotp_params imx6sll_params = { - .nregs = 128, + .nregs = 80, .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_DEFAULT, From 7d6e10f5d254681983b53d979422c8de3fadbefb Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 13 Oct 2023 13:49:03 +0100 Subject: [PATCH 234/661] nvmem: imx: correct nregs for i.MX6UL The nregs for i.MX6UL should be 144 per fuse map, correct it. Fixes: 4aa2b4802046 ("nvmem: octop: Add support for imx6ul") Cc: Stable@vger.kernel.org Signed-off-by: Peng Fan Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013124904.175782-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/imx-ocotp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 7302f25b14a1b3..921afe114a2c6e 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -512,7 +512,7 @@ static const struct ocotp_params imx6sx_params = { }; static const struct ocotp_params imx6ul_params = { - .nregs = 128, + .nregs = 144, .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_DEFAULT, From 2382c1b044231fd49eaf9aa82bc7113fc55487b8 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 13 Oct 2023 13:49:04 +0100 Subject: [PATCH 235/661] nvmem: imx: correct nregs for i.MX6ULL The nregs for i.MX6ULL should be 80 per fuse map, correct it. Fixes: ffbc34bf0e9c ("nvmem: imx-ocotp: Implement i.MX6ULL/ULZ support") Cc: Stable@vger.kernel.org Signed-off-by: Peng Fan Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20231013124904.175782-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/imx-ocotp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 921afe114a2c6e..e8b6f194925dfc 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -519,7 +519,7 @@ static const struct ocotp_params imx6ul_params = { }; static const struct ocotp_params imx6ull_params = { - .nregs = 64, + .nregs = 80, .bank_address_words = 0, .set_timing = imx_ocotp_set_imx6_timing, .ctrl = IMX_OCOTP_BM_CTRL_DEFAULT, From 3975e72b164dc8347a28dd0d5f11b346af534635 Mon Sep 17 00:00:00 2001 From: Christopher Obbard Date: Fri, 13 Oct 2023 12:47:26 +0100 Subject: [PATCH 236/661] arm64: dts: rockchip: Add i2s0-2ch-bus-bclk-off pins to RK3399 Commit 0efaf8078393 ("arm64: dts: rockchip: add i2s0-2ch-bus pins on rk3399") introduced a pinctl for i2s0 in two-channel mode. Commit 91419ae0420f ("arm64: dts: rockchip: use BCLK to GPIO switch on rk3399") modified i2s0 to switch the corresponding pins off when idle. Although an idle pinctrl node was added for i2s0 in 8-channel mode, a similar idle pinctrl node for i2s0 in 2-channel mode was not added. Add it. Fixes: 91419ae0420f ("arm64: dts: rockchip: use BCLK to GPIO switch on rk3399") Signed-off-by: Christopher Obbard Link: https://lore.kernel.org/r/20231013114737.494410-2-chris.obbard@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 9da0b6d77c8d2d..5bc2d4faeea6df 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -2457,6 +2457,16 @@ <4 RK_PA0 1 &pcfg_pull_none>; }; + i2s0_2ch_bus_bclk_off: i2s0-2ch-bus-bclk-off { + rockchip,pins = + <3 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>, + <3 RK_PD1 1 &pcfg_pull_none>, + <3 RK_PD2 1 &pcfg_pull_none>, + <3 RK_PD3 1 &pcfg_pull_none>, + <3 RK_PD7 1 &pcfg_pull_none>, + <4 RK_PA0 1 &pcfg_pull_none>; + }; + i2s0_8ch_bus: i2s0-8ch-bus { rockchip,pins = <3 RK_PD0 1 &pcfg_pull_none>, From 8cd79b729e746cb167f1563d015a93fc0a079899 Mon Sep 17 00:00:00 2001 From: Christopher Obbard Date: Fri, 13 Oct 2023 12:47:27 +0100 Subject: [PATCH 237/661] arm64: dts: rockchip: Fix i2s0 pin conflict on ROCK Pi 4 boards Commit 91419ae0420f ("arm64: dts: rockchip: use BCLK to GPIO switch on rk3399") modified i2s0 to switch the corresponding pins off when idle. For the ROCK Pi 4 boards, this means that i2s0 has the following pinctrl setting: pinctrl-names = "bclk_on", "bclk_off"; pinctrl-0 = <&i2s0_2ch_bus>; pinctrl-1 = <&i2s0_8ch_bus_bclk_off>; Due to this change, i2s0 fails to probe on my Radxa ROCK 4SE and ROCK Pi 4B boards: rockchip-pinctrl pinctrl: pin gpio3-29 already requested by leds; cannot claim for ff880000.i2s rockchip-pinctrl pinctrl: pin-125 (ff880000.i2s) status -22 rockchip-pinctrl pinctrl: could not request pin 125 (gpio3-29) from group i2s0-8ch-bus-bclk-off on device rockchip-pinctrl rockchip-i2s ff880000.i2s: Error applying setting, reverse things back rockchip-i2s ff880000.i2s: bclk disable failed -22 A pin requested for i2s0_8ch_bus_bclk_off has already been requested by user_led2, so whichever driver probes first will have the pin allocated. The hardware uses 2-channel i2s so fix this error by setting pinctl-1 to i2s0_2ch_bus_bclk_off which doesn't contain the pin allocated to user_led2. I checked the schematics for all Radxa boards based on ROCK Pi 4 and this change is compatible with all boards. Fixes: 91419ae0420f ("arm64: dts: rockchip: use BCLK to GPIO switch on rk3399") Signed-off-by: Christopher Obbard Link: https://lore.kernel.org/r/20231013114737.494410-3-chris.obbard@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 7dccbe8a939307..f2279aa6ca9e12 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -492,6 +492,7 @@ &i2s0 { pinctrl-0 = <&i2s0_2ch_bus>; + pinctrl-1 = <&i2s0_2ch_bus_bclk_off>; rockchip,capture-channels = <2>; rockchip,playback-channels = <2>; status = "okay"; From dc608db793731426938baa2f0e75a4a3cce5f5cf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Oct 2023 14:19:52 +0300 Subject: [PATCH 238/661] fbdev: omapfb: fix some error codes Return negative -ENXIO instead of positive ENXIO. Signed-off-by: Dan Carpenter Signed-off-by: Helge Deller --- drivers/video/fbdev/omap/omapfb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c index f28cb90947a341..42c96f1cfc93c4 100644 --- a/drivers/video/fbdev/omap/omapfb_main.c +++ b/drivers/video/fbdev/omap/omapfb_main.c @@ -1645,13 +1645,13 @@ static int omapfb_do_probe(struct platform_device *pdev, } fbdev->int_irq = platform_get_irq(pdev, 0); if (fbdev->int_irq < 0) { - r = ENXIO; + r = -ENXIO; goto cleanup; } fbdev->ext_irq = platform_get_irq(pdev, 1); if (fbdev->ext_irq < 0) { - r = ENXIO; + r = -ENXIO; goto cleanup; } From e638d3710f0e2483ce01fbc113da83ba3639489c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 16 Oct 2023 22:04:40 +0200 Subject: [PATCH 239/661] fbdev: sa1100fb: mark sa1100fb_init() static This is a global function that is only referenced as an initcall. This causes a warning: drivers/video/fbdev/sa1100fb.c:1218:12: error: no previous prototype for 'sa1100fb_init' [-Werror=missing-prototypes] Make it static instead. Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller --- drivers/video/fbdev/sa1100fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/sa1100fb.c b/drivers/video/fbdev/sa1100fb.c index 3d76ce11148826..cf0f706762b49b 100644 --- a/drivers/video/fbdev/sa1100fb.c +++ b/drivers/video/fbdev/sa1100fb.c @@ -1214,7 +1214,7 @@ static struct platform_driver sa1100fb_driver = { }, }; -int __init sa1100fb_init(void) +static int __init sa1100fb_init(void) { if (fb_get_options("sa1100fb", NULL)) return -ENODEV; From 0c37bffaaebe1733433b480d612282169632a31a Mon Sep 17 00:00:00 2001 From: Jorge Maidana Date: Fri, 6 Oct 2023 17:43:46 -0300 Subject: [PATCH 240/661] fbdev: uvesafb: Remove uvesafb_exec() prototype from include/video/uvesafb.h uvesafb_exec() is a static function defined and called only in drivers/video/fbdev/uvesafb.c, remove the prototype from include/video/uvesafb.h. Fixes the warning: ./include/video/uvesafb.h:112:12: warning: 'uvesafb_exec' declared 'static' but never defined [-Wunused-function] when including '