From 3af56fc07fa3c2d8170d9271762ab6d3ba115779 Mon Sep 17 00:00:00 2001 From: Ewan Hai Date: Mon, 14 Apr 2025 03:53:42 -0400 Subject: [PATCH 001/136] target/i386: Fix model number of Zhaoxin YongFeng vCPU template The model number was mistakenly set to 0x0b (11) in commit ff04bc1ac4. The correct value is 0x5b. This mistake occurred because the extended model bits in cpuid[eax=0x1].eax were overlooked, and only the base model was used. Using the wrong model number can affect guest behavior. One known issue is that vPMU (which relies on the model number) may fail to operate correctly. This patch corrects the model field by introducing a new vCPU version. Fixes: ff04bc1ac4 ("target/i386: Introduce Zhaoxin Yongfeng CPU model") Signed-off-by: Ewan Hai Link: https://lore.kernel.org/r/20250414075342.411626-1-ewanhai-oc@zhaoxin.com Signed-off-by: Paolo Bonzini (cherry picked from commit 280712b78781c43511d6286d40f9a518a4de25ff) Signed-off-by: Michael Tokarev --- target/i386/cpu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 1b64ceaaba..3fb1ec62da 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5621,6 +5621,18 @@ static const X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, .model_id = "Zhaoxin YongFeng Processor", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .note = "with the correct model number", + .props = (PropValue[]) { + { "model", "0x5b" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, }; From 86ffc25d3590bedac3fe00d6e2a5613ab89a5d68 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 27 Mar 2025 19:24:16 +0100 Subject: [PATCH 002/136] target/i386: Reset parked vCPUs together with the online ones Commit 3f2a05b31ee9 ("target/i386: Reset TSCs of parked vCPUs too on VM reset") introduced a way to reset TSCs of parked vCPUs during VM reset to prevent them getting desynchronized with the online vCPUs and therefore causing the KVM PV clock to lose PVCLOCK_TSC_STABLE_BIT. The way this was done was by registering a parked vCPU-specific QEMU reset callback via qemu_register_reset(). However, it turns out that on particularly device-rich VMs QEMU reset callbacks can take a long time to execute (which isn't surprising, considering that they involve resetting all of VM devices). In particular, their total runtime can exceed the 1-second TSC synchronization window introduced in KVM commit 5d3cb0f6a8e3 ("KVM: Improve TSC offset matching"). Since the TSCs of online vCPUs are only reset from "synchronize_post_reset" AccelOps handler (which runs after all qemu_register_reset() handlers) this essentially makes that fix ineffective on these VMs. The easiest way to guarantee that these parked vCPUs are reset at the same time as the online ones (regardless how long it takes for VM devices to reset) is to piggyback on post-reset vCPU synchronization handler for one of online vCPUs - as there is no generic post-reset AccelOps handler that isn't per-vCPU. The first online vCPU was selected for that since it is easily available under "first_cpu" define. This does not create an ordering issue since the order of vCPU TSC resets does not matter. Fixes: 3f2a05b31ee9 ("target/i386: Reset TSCs of parked vCPUs too on VM reset") Signed-off-by: Maciej S. Szmigiero Link: https://lore.kernel.org/r/e8b85a5915f79aa177ca49eccf0e9b534470c1cd.1743099810.git.maciej.szmigiero@oracle.com Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini (cherry picked from commit f6b5f71f04529d3f56b35f91badac9f5e7e225ca) Signed-off-by: Michael Tokarev --- accel/kvm/kvm-all.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f89568bfa3..951e8214e0 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -437,9 +437,8 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) return kvm_fd; } -static void kvm_reset_parked_vcpus(void *param) +static void kvm_reset_parked_vcpus(KVMState *s) { - KVMState *s = param; struct KVMParkedVcpu *cpu; QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) { @@ -2738,7 +2737,6 @@ static int kvm_init(MachineState *ms) } qemu_register_reset(kvm_unpoison_all, NULL); - qemu_register_reset(kvm_reset_parked_vcpus, s); if (s->kernel_irqchip_allowed) { kvm_irqchip_create(s); @@ -2908,6 +2906,10 @@ static void do_kvm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg void kvm_cpu_synchronize_post_reset(CPUState *cpu) { run_on_cpu(cpu, do_kvm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); + + if (cpu == first_cpu) { + kvm_reset_parked_vcpus(kvm_state); + } } static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) From 00a5dc28994e96f71a4aa7ad4273af9581f4f289 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Apr 2025 21:39:54 +0200 Subject: [PATCH 003/136] target/i386/hvf: fix lflags_to_rflags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear the flags before adding in the ones computed from lflags. Cc: Wei Liu Cc: qemu-stable@nongnu.org Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini (cherry picked from commit 94a159f3dc737d00749cc930adaec112abe07b3c) Signed-off-by: Michael Tokarev --- target/i386/hvf/x86_flags.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/hvf/x86_flags.c b/target/i386/hvf/x86_flags.c index 03d6de5efc..fedc70a1b8 100644 --- a/target/i386/hvf/x86_flags.c +++ b/target/i386/hvf/x86_flags.c @@ -293,6 +293,7 @@ void set_SF(CPUX86State *env, bool val) void lflags_to_rflags(CPUX86State *env) { + env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O); env->eflags |= get_CF(env) ? CC_C : 0; env->eflags |= get_PF(env) ? CC_P : 0; env->eflags |= get_AF(env) ? CC_A : 0; From 2da497fd4fc0a5e09432443c6470c6e673e025d1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Mar 2025 10:16:23 -0700 Subject: [PATCH 004/136] target/avr: Improve decode of LDS, STS The comment about not being able to define a field with zero bits is out of date since 94597b6146f3 ("decodetree: Allow !function with no input bits"). This fixes the missing load of imm in the disassembler. Cc: qemu-stable@nongnu.org Fixes: 9d8caa67a24 ("target/avr: Add support for disassembling via option '-d in_asm'") Reviewed-by: Pierrick Bouvier Signed-off-by: Richard Henderson (cherry picked from commit 6b661b7ed7cd02c54a78426d5eb7dd8543b030ed) Signed-off-by: Michael Tokarev --- target/avr/insn.decode | 7 ++----- target/avr/translate.c | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/target/avr/insn.decode b/target/avr/insn.decode index 482c23ad0c..cc302249db 100644 --- a/target/avr/insn.decode +++ b/target/avr/insn.decode @@ -118,11 +118,8 @@ BRBC 1111 01 ....... ... @op_bit_imm @io_rd_imm .... . .. ..... .... &rd_imm rd=%rd imm=%io_imm @ldst_d .. . . .. . rd:5 . ... &rd_imm imm=%ldst_d_imm -# The 16-bit immediate is completely in the next word. -# Fields cannot be defined with no bits, so we cannot play -# the same trick and append to a zero-bit value. -# Defer reading the immediate until trans_{LDS,STS}. -@ldst_s .... ... rd:5 .... imm=0 +%ldst_imm !function=next_word +@ldst_s .... ... rd:5 .... imm=%ldst_imm MOV 0010 11 . ..... .... @op_rd_rr MOVW 0000 0001 .... .... &rd_rr rd=%rd_d rr=%rr_d diff --git a/target/avr/translate.c b/target/avr/translate.c index 4ab71d8138..e7f8ced9b3 100644 --- a/target/avr/translate.c +++ b/target/avr/translate.c @@ -1578,7 +1578,6 @@ static bool trans_LDS(DisasContext *ctx, arg_LDS *a) TCGv Rd = cpu_r[a->rd]; TCGv addr = tcg_temp_new_i32(); TCGv H = cpu_rampD; - a->imm = next_word(ctx); tcg_gen_mov_tl(addr, H); /* addr = H:M:L */ tcg_gen_shli_tl(addr, addr, 16); @@ -1783,7 +1782,6 @@ static bool trans_STS(DisasContext *ctx, arg_STS *a) TCGv Rd = cpu_r[a->rd]; TCGv addr = tcg_temp_new_i32(); TCGv H = cpu_rampD; - a->imm = next_word(ctx); tcg_gen_mov_tl(addr, H); /* addr = H:M:L */ tcg_gen_shli_tl(addr, addr, 16); From 94da90b8c7fa949ae3f61c254ee90b04889c016e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 22 Apr 2025 10:32:31 +0200 Subject: [PATCH 005/136] hw/core: Get default_cpu_type calling machine_class_default_cpu_type() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 62b4a227a33 the default cpu type can come from the valid_cpu_types[] array. Call the machine_class_default_cpu_type() instead of accessing MachineClass::default_cpu_type field. Cc: qemu-stable@nongnu.org Fixes: 62b4a227a33 ("hw/core: Add machine_class_default_cpu_type()") Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Reviewed-by: Pierrick Bouvier Reviewed-by: Zhao Liu Message-Id: <20250422084114.39499-1-philmd@linaro.org> (cherry picked from commit d5f241834be1b323ea697a469ff0f1335a1823fe) Signed-off-by: Michael Tokarev --- hw/core/machine-qmp-cmds.c | 5 +++-- target/ppc/cpu_init.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c index 3130c5cd45..1bc21b84a4 100644 --- a/hw/core/machine-qmp-cmds.c +++ b/hw/core/machine-qmp-cmds.c @@ -72,6 +72,7 @@ MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props, for (el = machines; el; el = el->next) { MachineClass *mc = el->data; + const char *default_cpu_type = machine_class_default_cpu_type(mc); MachineInfo *info; info = g_malloc0(sizeof(*info)); @@ -90,8 +91,8 @@ MachineInfoList *qmp_query_machines(bool has_compat_props, bool compat_props, info->numa_mem_supported = mc->numa_mem_supported; info->deprecated = !!mc->deprecation_reason; info->acpi = !!object_class_property_find(OBJECT_CLASS(mc), "acpi"); - if (mc->default_cpu_type) { - info->default_cpu_type = g_strdup(mc->default_cpu_type); + if (default_cpu_type) { + info->default_cpu_type = g_strdup(default_cpu_type); } if (mc->default_ram_id) { info->default_ram_id = g_strdup(mc->default_ram_id); diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index f81cb680fc..21a76e904a 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7082,7 +7082,7 @@ ObjectClass *ppc_cpu_class_by_name(const char *name) if (strcmp(name, "max") == 0) { MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); if (mc) { - return object_class_by_name(mc->default_cpu_type); + return object_class_by_name(machine_class_default_cpu_type(mc)); } } #endif From dbbb444d72f7c7b251e9b88185d9b8fc2a468efa Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 17 Mar 2025 14:28:11 +0000 Subject: [PATCH 006/136] hw/core/cpu: gdb_arch_name string should not be freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation for the CPUClass::gdb_arch_name method claims that the returned string should be freed with g_free(). This is not correct: in commit a650683871ba728 we changed this method to instead return a simple constant string, but forgot to update the documentation. Make the documentation match the new semantics. Fixes: a650683871ba728 ("hw/core/cpu: Return static value with gdb_arch_name()") Signed-off-by: Peter Maydell Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20250317142819.900029-2-peter.maydell@linaro.org> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit 56a9f0d4c4a483ce217e5290db69cb1788586787) Signed-off-by: Michael Tokarev --- include/hw/core/cpu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index abd8764e83..e136b067cd 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -135,7 +135,8 @@ struct SysemuCPUOps; * @gdb_stop_before_watchpoint: Indicates whether GDB expects the CPU to stop * before the insn which triggers a watchpoint rather than after it. * @gdb_arch_name: Optional callback that returns the architecture name known - * to GDB. The caller must free the returned string with g_free. + * to GDB. The returned value is expected to be a simple constant string: + * the caller will not g_free() it. * @disas_set_info: Setup architecture specific components of disassembly info * @adjust_watchpoint_address: Perform a target-specific adjustment to an * address before attempting to match it against watchpoints. From e60bbdad934c619d1d325b68f51497f740fe1888 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sat, 12 Apr 2025 21:40:03 +0200 Subject: [PATCH 007/136] target/mips: Fix MIPS16e translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a wrong conversion to gen_op_addr_addi(). The framesize should be added like it was done before. This bug broke booting OpenWrt MIPS32 BE malta Linux system images generated by OpenWrt. Cc: qemu-stable@nongnu.org Fixes: d0b24b7f50e1 ("target/mips: Use gen_op_addr_addi() when possible") Signed-off-by: Hauke Mehrtens Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20250412194003.181411-1-hauke@hauke-m.de> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit d4a785ba30ce6d8acf0206f049fb4a7494e0898a) Signed-off-by: Michael Tokarev --- target/mips/tcg/mips16e_translate.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc index a9af8f1e74..97da3456ea 100644 --- a/target/mips/tcg/mips16e_translate.c.inc +++ b/target/mips/tcg/mips16e_translate.c.inc @@ -306,7 +306,7 @@ static void gen_mips16_restore(DisasContext *ctx, int astatic; TCGv t0 = tcg_temp_new(); - gen_op_addr_addi(ctx, t0, cpu_gpr[29], -framesize); + gen_op_addr_addi(ctx, t0, cpu_gpr[29], framesize); if (do_ra) { decr_and_load(ctx, 31, t0); @@ -386,7 +386,7 @@ static void gen_mips16_restore(DisasContext *ctx, } } - gen_op_addr_addi(ctx, cpu_gpr[29], cpu_gpr[29], -framesize); + gen_op_addr_addi(ctx, cpu_gpr[29], cpu_gpr[29], framesize); } #if defined(TARGET_MIPS64) From 7bff88a745e6d1cecbdbabf6691d412394171037 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 24 Apr 2025 13:50:11 +0900 Subject: [PATCH 008/136] meson: Use has_header_symbol() to check getcpu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of gnu_source_prefix in the detection of getcpu() was ineffective because the header file that declares getcpu() when _GNU_SOURCE is defined was not included. Pass sched.h to has_header_symbol() so that the existence of the declaration will be properly checked. Cc: qemu-stable@nongnu.org Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Message-ID: <20250424-buildsys-v1-1-97655e3b25d7@daynix.com> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit 563cd698dffb977eea0ccfef3b95f6f9786766f3) Signed-off-by: Michael Tokarev --- meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 41f68d3806..c0ecb6feab 100644 --- a/meson.build +++ b/meson.build @@ -2635,7 +2635,6 @@ config_host_data.set('CONFIG_CLOCK_ADJTIME', cc.has_function('clock_adjtime')) config_host_data.set('CONFIG_DUP3', cc.has_function('dup3')) config_host_data.set('CONFIG_FALLOCATE', cc.has_function('fallocate')) config_host_data.set('CONFIG_POSIX_FALLOCATE', cc.has_function('posix_fallocate')) -config_host_data.set('CONFIG_GETCPU', cc.has_function('getcpu', prefix: gnu_source_prefix)) config_host_data.set('CONFIG_SCHED_GETCPU', cc.has_function('sched_getcpu', prefix: '#include ')) # Note that we need to specify prefix: here to avoid incorrectly # thinking that Windows has posix_memalign() @@ -2713,6 +2712,8 @@ config_host_data.set('CONFIG_FALLOCATE_ZERO_RANGE', config_host_data.set('CONFIG_FIEMAP', cc.has_header('linux/fiemap.h') and cc.has_header_symbol('linux/fs.h', 'FS_IOC_FIEMAP')) +config_host_data.set('CONFIG_GETCPU', + cc.has_header_symbol('sched.h', 'getcpu', prefix: gnu_source_prefix)) config_host_data.set('CONFIG_GETRANDOM', cc.has_function('getrandom') and cc.has_header_symbol('sys/random.h', 'GRND_NONBLOCK')) From 1604055e4e8ccf37ad0a27a4fcf6985201ff00ae Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 24 Apr 2025 13:50:12 +0900 Subject: [PATCH 009/136] meson: Remove CONFIG_STATX and CONFIG_STATX_MNT_ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CONFIG_STATX and CONFIG_STATX_MNT_ID are not used since commit e0dc2631ec4 ("virtiofsd: Remove source"). Cc: qemu-stable@nongnu.org Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Message-ID: <20250424-buildsys-v1-2-97655e3b25d7@daynix.com> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit 6804b89fb531f5dd49c1e038214c89272383e220) Signed-off-by: Michael Tokarev --- meson.build | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/meson.build b/meson.build index c0ecb6feab..1a02cd8990 100644 --- a/meson.build +++ b/meson.build @@ -2191,14 +2191,6 @@ gnu_source_prefix = ''' #endif ''' -# Check whether the glibc provides STATX_BASIC_STATS - -has_statx = cc.has_header_symbol('sys/stat.h', 'STATX_BASIC_STATS', prefix: gnu_source_prefix) - -# Check whether statx() provides mount ID information - -has_statx_mnt_id = cc.has_header_symbol('sys/stat.h', 'STATX_MNT_ID', prefix: gnu_source_prefix) - have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ .require(host_os == 'linux', error_message: 'vhost_user_blk_server requires linux') \ @@ -2560,8 +2552,6 @@ config_host_data.set('CONFIG_CRYPTO_SM3', crypto_sm3.found()) config_host_data.set('CONFIG_HOGWEED', hogweed.found()) config_host_data.set('CONFIG_QEMU_PRIVATE_XTS', xts == 'private') config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) -config_host_data.set('CONFIG_STATX', has_statx) -config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) config_host_data.set('CONFIG_ZSTD', zstd.found()) config_host_data.set('CONFIG_QPL', qpl.found()) config_host_data.set('CONFIG_UADK', uadk.found()) From e1ccfea380ceafed77269735eee4b3b862dc8b6f Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 24 Apr 2025 13:50:13 +0900 Subject: [PATCH 010/136] meson: Share common C source prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gnu_source_prefix defines _GNU_SOURCE for compiler object functions. The definition is universally available in the code base. docs/devel/style.rst also says that the "qemu/osdep.h" header is always included, so files included in the file is also universally available in the code base. Rename gnu_source_prefix to osdep_prefix, and add #include directives that are referred by the users of gnu_source_prefix and contained in qemu/osdep.h to safely de-duplicate #include directives. Cc: qemu-stable@nongnu.org Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Message-ID: <20250424-buildsys-v1-3-97655e3b25d7@daynix.com> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit 797150d69d2edba8b1bd4a7d8c7ba2df1219c503) Signed-off-by: Michael Tokarev --- meson.build | 68 +++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/meson.build b/meson.build index 1a02cd8990..0a35fc3fa9 100644 --- a/meson.build +++ b/meson.build @@ -2185,10 +2185,21 @@ if not has_malloc_trim and get_option('malloc_trim').enabled() endif endif -gnu_source_prefix = ''' +osdep_prefix = ''' #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif + + #include + #include + + #include + /* Put unistd.h before time.h as that triggers localtime_r/gmtime_r + * function availability on recentish Mingw-w64 platforms. */ + #include + #include + #include + #include ''' have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ @@ -2703,7 +2714,7 @@ config_host_data.set('CONFIG_FIEMAP', cc.has_header('linux/fiemap.h') and cc.has_header_symbol('linux/fs.h', 'FS_IOC_FIEMAP')) config_host_data.set('CONFIG_GETCPU', - cc.has_header_symbol('sched.h', 'getcpu', prefix: gnu_source_prefix)) + cc.has_header_symbol('sched.h', 'getcpu', prefix: osdep_prefix)) config_host_data.set('CONFIG_GETRANDOM', cc.has_function('getrandom') and cc.has_header_symbol('sys/random.h', 'GRND_NONBLOCK')) @@ -2748,8 +2759,7 @@ config_host_data.set('HAVE_UTMPX', config_host_data.set('CONFIG_EVENTFD', cc.links(''' #include int main(void) { return eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); }''')) -config_host_data.set('CONFIG_FDATASYNC', cc.links(gnu_source_prefix + ''' - #include +config_host_data.set('CONFIG_FDATASYNC', cc.links(osdep_prefix + ''' int main(void) { #if defined(_POSIX_SYNCHRONIZED_IO) && _POSIX_SYNCHRONIZED_IO > 0 return fdatasync(0); @@ -2758,10 +2768,8 @@ config_host_data.set('CONFIG_FDATASYNC', cc.links(gnu_source_prefix + ''' #endif }''')) -has_madvise = cc.links(gnu_source_prefix + ''' - #include +has_madvise = cc.links(osdep_prefix + ''' #include - #include int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }''') missing_madvise_proto = false if has_madvise @@ -2771,21 +2779,18 @@ if has_madvise # missing-prototype case, we try again with a definitely-bogus prototype. # This will only compile if the system headers don't provide the prototype; # otherwise the conflicting prototypes will cause a compiler error. - missing_madvise_proto = cc.links(gnu_source_prefix + ''' - #include + missing_madvise_proto = cc.links(osdep_prefix + '''> #include - #include extern int madvise(int); int main(void) { return madvise(0); }''') endif config_host_data.set('CONFIG_MADVISE', has_madvise) config_host_data.set('HAVE_MADVISE_WITHOUT_PROTOTYPE', missing_madvise_proto) -config_host_data.set('CONFIG_MEMFD', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_MEMFD', cc.links(osdep_prefix + ''' #include int main(void) { return memfd_create("foo", MFD_ALLOW_SEALING); }''')) -config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix + ''' - #include +config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(osdep_prefix + ''' #if !defined(AT_EMPTY_PATH) # error missing definition #else @@ -2796,13 +2801,12 @@ config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix + ''' # i.e. errno is set and -1 is returned. That's not really how POSIX defines the # function. On the flip side, it has madvise() which is preferred anyways. if host_os != 'darwin' - config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + ''' + config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(osdep_prefix + ''' #include - #include int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }''')) endif -config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(osdep_prefix + ''' #include static void *f(void *p) { return NULL; } @@ -2813,7 +2817,7 @@ config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_pref pthread_setname_np(thread, "QEMU"); return 0; }''', dependencies: threads)) -config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(osdep_prefix + ''' #include static void *f(void *p) { pthread_setname_np("QEMU"); return NULL; } @@ -2823,7 +2827,7 @@ config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_WO_TID', cc.links(gnu_source_pre pthread_create(&thread, 0, f, 0); return 0; }''', dependencies: threads)) -config_host_data.set('CONFIG_PTHREAD_SET_NAME_NP', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_PTHREAD_SET_NAME_NP', cc.links(osdep_prefix + ''' #include #include @@ -2835,9 +2839,8 @@ config_host_data.set('CONFIG_PTHREAD_SET_NAME_NP', cc.links(gnu_source_prefix + pthread_set_name_np(thread, "QEMU"); return 0; }''', dependencies: threads)) -config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(osdep_prefix + ''' #include - #include int main(void) { @@ -2846,7 +2849,7 @@ config_host_data.set('CONFIG_PTHREAD_CONDATTR_SETCLOCK', cc.links(gnu_source_pre pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); return 0; }''', dependencies: threads)) -config_host_data.set('CONFIG_PTHREAD_AFFINITY_NP', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_PTHREAD_AFFINITY_NP', cc.links(osdep_prefix + ''' #include static void *f(void *p) { return NULL; } @@ -2863,15 +2866,10 @@ config_host_data.set('CONFIG_PTHREAD_AFFINITY_NP', cc.links(gnu_source_prefix + CPU_FREE(cpuset); return 0; }''', dependencies: threads)) -config_host_data.set('CONFIG_SIGNALFD', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_SIGNALFD', cc.links(osdep_prefix + ''' #include - #include int main(void) { return signalfd(-1, NULL, SFD_CLOEXEC); }''')) -config_host_data.set('CONFIG_SPLICE', cc.links(gnu_source_prefix + ''' - #include - #include - #include - +config_host_data.set('CONFIG_SPLICE', cc.links(osdep_prefix + ''' int main(void) { int len, fd = 0; @@ -2880,13 +2878,13 @@ config_host_data.set('CONFIG_SPLICE', cc.links(gnu_source_prefix + ''' return 0; }''')) -config_host_data.set('HAVE_MLOCKALL', cc.links(gnu_source_prefix + ''' +config_host_data.set('HAVE_MLOCKALL', cc.links(osdep_prefix + ''' #include int main(void) { return mlockall(MCL_FUTURE); }''')) -config_host_data.set('HAVE_MLOCK_ONFAULT', cc.links(gnu_source_prefix + ''' +config_host_data.set('HAVE_MLOCK_ONFAULT', cc.links(osdep_prefix + ''' #include int main(void) { return mlockall(MCL_FUTURE | MCL_ONFAULT); @@ -2895,7 +2893,7 @@ config_host_data.set('HAVE_MLOCK_ONFAULT', cc.links(gnu_source_prefix + ''' have_l2tpv3 = false if get_option('l2tpv3').allowed() and have_system have_l2tpv3 = cc.has_type('struct mmsghdr', - prefix: gnu_source_prefix + ''' + prefix: osdep_prefix + ''' #include #include ''') endif @@ -3011,13 +3009,13 @@ if has_int128_type endif endif -config_host_data.set('CONFIG_GETAUXVAL', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_GETAUXVAL', cc.links(osdep_prefix + ''' #include int main(void) { return getauxval(AT_HWCAP) == 0; }''')) -config_host_data.set('CONFIG_ELF_AUX_INFO', cc.links(gnu_source_prefix + ''' +config_host_data.set('CONFIG_ELF_AUX_INFO', cc.links(osdep_prefix + ''' #include int main(void) { unsigned long hwcap = 0; @@ -3130,9 +3128,7 @@ config_host_data.set('CONFIG_MEMBARRIER', get_option('membarrier') \ .allowed()) have_afalg = get_option('crypto_afalg') \ - .require(cc.compiles(gnu_source_prefix + ''' - #include - #include + .require(cc.compiles(osdep_prefix + ''' #include #include int main(void) { From 8a9a7193143a75c54eb6c184e583c5c2b8935f18 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Thu, 24 Apr 2025 13:50:14 +0900 Subject: [PATCH 011/136] meson: Use osdep_prefix for strchrnul() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS SDK may have the symbol of strchrnul(), but it is actually available only on macOS 15.4 or later and that fact is codified in string.h. Include the header file using osdep_prefix to check if the function is available on the deployment target. Cc: qemu-stable@nongnu.org Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Message-ID: <20250424-buildsys-v1-4-97655e3b25d7@daynix.com> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit a5b30be534538dc6e44a68ce9734e45dd08f52ec) Signed-off-by: Michael Tokarev --- meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 0a35fc3fa9..8ec796d835 100644 --- a/meson.build +++ b/meson.build @@ -2193,6 +2193,7 @@ osdep_prefix = ''' #include #include + #include #include /* Put unistd.h before time.h as that triggers localtime_r/gmtime_r * function availability on recentish Mingw-w64 platforms. */ @@ -2657,7 +2658,7 @@ config_host_data.set('HAVE_GETIFADDRS', cc.has_function('getifaddrs')) config_host_data.set('HAVE_GLIB_WITH_SLICE_ALLOCATOR', glib_has_gslice) config_host_data.set('HAVE_GLIB_WITH_ALIGNED_ALLOC', glib_has_aligned_alloc) config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: util)) -config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul')) +config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul', prefix: osdep_prefix)) config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: '#include ')) if rbd.found() config_host_data.set('HAVE_RBD_NAMESPACE_EXISTS', From 86b846f9027ff5bcdff06a6a64d3d99ae3469150 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 30 Apr 2025 14:35:47 -0700 Subject: [PATCH 012/136] accel/tcg: Don't use TARGET_LONG_BITS in decode_sleb128 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we changed decode_sleb128 from target_long to int64_t, we failed to adjust the shift limit. Cc: qemu-stable@nongnu.org Fixes: c9ad8d27caa ("tcg: Widen gen_insn_data to uint64_t") Reviewed-by: Pierrick Bouvier Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson (cherry picked from commit 9401f91b9b0c46886388735b3f2033a9c254895a) Signed-off-by: Michael Tokarev --- accel/tcg/translate-all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 82bc16bd53..a497c54b80 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -106,7 +106,7 @@ static int64_t decode_sleb128(const uint8_t **pp) val |= (int64_t)(byte & 0x7f) << shift; shift += 7; } while (byte & 0x80); - if (shift < TARGET_LONG_BITS && (byte & 0x40)) { + if (shift < 64 && (byte & 0x40)) { val |= -(int64_t)1 << shift; } From 07f034c7f4f8a57068fbedce4891a60e5b4469f5 Mon Sep 17 00:00:00 2001 From: Tim Lee Date: Mon, 28 Apr 2025 10:29:34 +0800 Subject: [PATCH 013/136] hw/arm/npcm8xx_boards: Correct valid_cpu_types setting of NPCM8XX SoC NPCM8XX SoC is the successor of the NPCM7XX. It features quad-core Cortex-A35 (Armv8, 64-bit) CPUs and some additional peripherals. Correct the `valid_cpu_types` setting to match the NPCM8XX SoC. Cc: qemu-stable@nongnu.org Fixes: 7e70eb3cad7c83 ("hw/arm: Add NPCM845 Evaluation board") Signed-off-by: Tim Lee Message-id: 20250428022934.3081139-1-timlee660101@gmail.com Reviewed-by: Peter Maydell Reviewed-by: Tyrone Ting Signed-off-by: Peter Maydell (cherry picked from commit 97cdd1b0a7a010702a1d118b74c3af3bb2edb35c) Signed-off-by: Michael Tokarev --- hw/arm/npcm8xx_boards.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/arm/npcm8xx_boards.c b/hw/arm/npcm8xx_boards.c index 3fb8478e72..6d3e59f6b9 100644 --- a/hw/arm/npcm8xx_boards.c +++ b/hw/arm/npcm8xx_boards.c @@ -213,7 +213,7 @@ static void npcm8xx_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); static const char * const valid_cpu_types[] = { - ARM_CPU_TYPE_NAME("cortex-a9"), + ARM_CPU_TYPE_NAME("cortex-a35"), NULL }; From ac32612b93343a9eb4af2dbac8ecaa9e596dcc27 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 1 May 2025 13:55:44 +0100 Subject: [PATCH 014/136] target/arm: Don't assert() for ISB/SB inside IT block If the guest code has an ISB or SB insn inside an IT block, we generate incorrect code which trips a TCG assertion: qemu-system-arm: ../tcg/tcg-op.c:3343: void tcg_gen_goto_tb(unsigned int): Assertion `(tcg_ctx->goto_tb_issue_mask & (1 << idx)) == 0' failed. This is because we call gen_goto_tb(dc, 1, ...) twice: brcond_i32 ZF,$0x0,ne,$L1 add_i32 pc,pc,$0x4 goto_tb $0x1 exit_tb $0x73d948001b81 set_label $L1 add_i32 pc,pc,$0x4 goto_tb $0x1 exit_tb $0x73d948001b81 Both calls are in arm_tr_tb_stop(), one for the DISAS_NEXT/DISAS_TOO_MANY handling, and one for the dc->condjump condition-failed codepath. The DISAS_NEXT handling doesn't have this problem because arm_post_translate_insn() does the handling of "emit the label for the condition-failed conditional execution" and so arm_tr_tb_stop() doesn't have dc->condjump set. But for DISAS_TOO_MANY we don't do that. Fix the bug by making arm_post_translate_insn() handle the DISAS_TOO_MANY case. This only affects the SB and ISB insns when used in Thumb mode inside an IT block: only these insns specifically set is_jmp to TOO_MANY, and their A32 encodings are unconditional. For the major TOO_MANY case (breaking the TB because it would cross a page boundary) we do that check and set is_jmp to TOO_MANY only after the call to arm_post_translate_insn(); so arm_post_translate_insn() sees is_jmp == DISAS_NEXT, and we emit the correct code for that situation. With this fix we generate the somewhat more sensible set of TCG ops: brcond_i32 ZF,$0x0,ne,$L1 set_label $L1 add_i32 pc,pc,$0x4 goto_tb $0x1 exit_tb $0x7c5434001b81 (NB: the TCG optimizer doesn't optimize out the jump-to-next, but we can't really avoid emitting it because we don't know at the point we're emitting the handling for the condexec check whether this insn is going to happen to be a nop for us or not.) Cc: qemu-stable@nongnu.org Fixes: https://gitlab.com/qemu-project/qemu/-/issues/2942 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250501125544.727038-1-peter.maydell@linaro.org (cherry picked from commit 8ed7c0b6488a7f20318d6ba414f1cbcd0ed92afe) Signed-off-by: Michael Tokarev --- target/arm/tcg/translate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index d280018138..7e749fc15b 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -7770,7 +7770,8 @@ static bool arm_check_ss_active(DisasContext *dc) static void arm_post_translate_insn(DisasContext *dc) { - if (dc->condjmp && dc->base.is_jmp == DISAS_NEXT) { + if (dc->condjmp && + (dc->base.is_jmp == DISAS_NEXT || dc->base.is_jmp == DISAS_TOO_MANY)) { if (dc->pc_save != dc->condlabel.pc_save) { gen_update_pc(dc, dc->condlabel.pc_save - dc->pc_save); } From be7b08eb9f5b352f8e805fc8eca07d186e3fff15 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 1 May 2025 10:31:26 +0100 Subject: [PATCH 015/136] docs: Don't define duplicate label in qemu-block-drivers.rst.inc Sphinx requires that labels within documents are unique across the whole manual. This is because the "create a hyperlink" directive specifies only the name of the label, not a filename+label. Some Sphinx versions will warn about duplicate labels, but even if there is no warning there is still an ambiguity and no guarantee that the hyperlink will be created to the right target. For QEMU this is awkward, because we have various .rst.inc fragments which we include into multiple .rst files. If you define a label in the .rst.inc file then it will be a duplicate label. We have mostly worked around this by not putting labels into those .rst.inc files, or by adding "insert a label" functionality into the hxtool extension (see commit 1eeb432a953b0 "doc/sphinx/hxtool.py: add optional label argument to SRST directive"). Unfortunately in commit 7f6314427e78 ("docs/devel: add a codebase section") we accidentally added a duplicate label, because not all Sphinx versions warn about the mistake. In this case the link was only from the developer docs codebase summary, so as the simplest fix for the stable branch, we drop the link entirely. Cc: qemu-stable@nongnu.org Fixes: 1eeb432a953b0 "doc/sphinx/hxtool.py: add optional label argument to SRST directive" Reported-by: Dario Faggioli Signed-off-by: Peter Maydell Acked-by: Eric Blake Reviewed-by: Pierrick Bouvier Message-id: 20250501093126.716667-1-peter.maydell@linaro.org (cherry picked from commit 82707dd4f07613eed8d639956a43bddffca5cd5c) Signed-off-by: Michael Tokarev --- docs/devel/codebase.rst | 2 +- docs/system/qemu-block-drivers.rst.inc | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/devel/codebase.rst b/docs/devel/codebase.rst index ef98578296..085da10a22 100644 --- a/docs/devel/codebase.rst +++ b/docs/devel/codebase.rst @@ -116,7 +116,7 @@ yet, so sometimes the source code is all you have. * `monitor `_: `Monitor ` implementation (HMP & QMP). * `nbd `_: - QEMU `NBD (Network Block Device) ` server. + QEMU NBD (Network Block Device) server. * `net `_: Network (host) support. * `pc-bios `_: diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc index cfe1acb78a..384e95ba76 100644 --- a/docs/system/qemu-block-drivers.rst.inc +++ b/docs/system/qemu-block-drivers.rst.inc @@ -500,8 +500,6 @@ What you should *never* do: - expect it to work when loadvm'ing - write to the FAT directory on the host system while accessing it with the guest system -.. _nbd: - NBD access ~~~~~~~~~~ From 01a9f1a6c5ea7275f35fbdfe60401ff4d21ec609 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Thu, 1 May 2025 20:34:45 +0200 Subject: [PATCH 016/136] hw/gpio/imx_gpio: Fix interpretation of GDIR polarity According to the i.MX 8M Plus reference manual, a GPIO pin is configured as an output when the corresponding bit in the GDIR register is set. The function imx_gpio_set_int_line() is intended to be a no-op if the pin is configured as an output, returning early in such cases. However, it inverts the condition. Fix this by returning early when the bit is set. cc: qemu-stable@nongnu.org Fixes: f44272809779 ("i.MX: Add GPIO device") Signed-off-by: Bernhard Beschow Message-id: 20250501183445.2389-4-shentey@gmail.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell (cherry picked from commit eba837a31b9579e30cc6d7ecb4b5c2662a6ffaba) Signed-off-by: Michael Tokarev --- hw/gpio/imx_gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c index 549a281ed7..8e0ee3048a 100644 --- a/hw/gpio/imx_gpio.c +++ b/hw/gpio/imx_gpio.c @@ -72,7 +72,7 @@ static void imx_gpio_update_int(IMXGPIOState *s) static void imx_gpio_set_int_line(IMXGPIOState *s, int line, IMXGPIOLevel level) { /* if this signal isn't configured as an input signal, nothing to do */ - if (!extract32(s->gdir, line, 1)) { + if (extract32(s->gdir, line, 1)) { return; } From f60033d3794427dfef30b2b1cd3485d3df459737 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 8 May 2025 11:32:11 +0200 Subject: [PATCH 017/136] target/i386: do not trigger IRQ shadow for LSS Because LSS need not trigger an IRQ shadow, gen_movl_seg can't just use the destination register to decide whether to inhibit IRQs. Add an argument. Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini (cherry picked from commit e54ef98c8a80d16158bab4341d9a898701270528) (back-ported to 10.0) Signed-off-by: Michael Tokarev --- target/i386/tcg/emit.c.inc | 4 ++-- target/i386/tcg/translate.c | 33 ++++++++++++++++++++------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 0fa1664a24..b4dcb46e81 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -352,7 +352,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv break; case X86_OP_SEG: /* Note that gen_movl_seg takes care of interrupt shadow and TF. */ - gen_movl_seg(s, op->n, s->T0); + gen_movl_seg(s, op->n, v, op->n == R_SS); break; case X86_OP_INT: if (op->has_ea) { @@ -2372,7 +2372,7 @@ static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) gen_op_ld_v(s, MO_16, s->T1, s->A0); /* load the segment here to handle exceptions properly */ - gen_movl_seg(s, seg, s->T1); + gen_movl_seg(s, seg, s->T1, false); } static void gen_LDS(DisasContext *s, X86DecodedInsn *decode) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a8935f487a..390018fdec 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -1992,25 +1992,32 @@ static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg) /* move SRC to seg_reg and compute if the CPU state may change. Never call this function with seg_reg == R_CS */ -static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src) +static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit_irq) { if (PE(s) && !VM86(s)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, src); - gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32); - /* abort translation because the addseg value may change or - because ss32 may change. For R_SS, translation must always - stop as a special handling must be done to disable hardware - interrupts for the next instruction */ - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } else if (CODE32(s) && seg_reg < R_FS) { + TCGv_i32 sel = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(sel, src); + gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); + + /* For move to DS/ES/SS, the addseg or ss32 flags may change. */ + if (CODE32(s) && seg_reg < R_FS) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { gen_op_movl_seg_real(s, seg_reg, src); - if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; - } + } + + /* + * For MOV or POP to SS (but not LSS) translation must always + * stop as a special handling must be done to disable hardware + * interrupts for the next instruction. + * + * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which + * might have been set above. + */ + if (inhibit_irq) { + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; } } From 827be9d37aa83ffc3a7489c73d4f4d11c4dba913 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Jul 2024 10:35:06 +0200 Subject: [PATCH 018/136] target/i386: do not block singlestep for STI STI will trigger a singlestep exception even if it has inhibit-IRQ behavior. Do not suppress single-step for all IRQ-inhibiting instructions, instead special case MOV SS and POP SS. Cc: qemu-stable@nongnu.org Fixes: f0f0136abba ("target/i386: no single-step exception after MOV or POP SS", 2024-05-25) Signed-off-by: Paolo Bonzini (cherry picked from commit 1e94ddc6854431064c94a7d8f2f2886def285829) Signed-off-by: Michael Tokarev --- target/i386/tcg/translate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 390018fdec..50cf56175f 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2013,11 +2013,15 @@ static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit * stop as a special handling must be done to disable hardware * interrupts for the next instruction. * + * This is the last instruction, so it's okay to overwrite + * HF_TF_MASK; the next TB will start with the flag set. + * * DISAS_EOB_INHIBIT_IRQ is a superset of DISAS_EOB_NEXT which * might have been set above. */ if (inhibit_irq) { s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; + s->flags &= ~HF_TF_MASK; } } @@ -2265,7 +2269,7 @@ gen_eob(DisasContext *s, int mode) if (mode == DISAS_EOB_RECHECK_TF) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) { + } else if (s->flags & HF_TF_MASK) { gen_helper_single_step(tcg_env); } else if (mode == DISAS_JUMP && /* give irqs a chance to happen */ From a03d7d6e32236d5c1b331e07b11cd7991f0d3a74 Mon Sep 17 00:00:00 2001 From: Aleksandr Partanen Date: Thu, 10 Apr 2025 17:46:04 +0300 Subject: [PATCH 019/136] xen: mapcache: Fix finding matching entry If we have request without lock and hit unlocked or invalid entry during the search, we remap it immediately, even if we have matching entry in next entries in bucket. This leads to duplication of mappings of the same size, and to possibility of selecting the wrong element during invalidation and underflow it's entry->lock counter Signed-off-by: Aleksandr Partanen Reviewed-by: Stefano Stabellini Reviewed-by: Edgar E. Iglesias Signed-off-by: Edgar E. Iglesias (cherry picked from commit a4b20f737cda06bb8706a83e27f7fa89863ae689) Signed-off-by: Michael Tokarev --- hw/xen/xen-mapcache.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 698b5c53ed..2c8f861fdb 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -376,12 +376,12 @@ static uint8_t *xen_map_cache_unlocked(MapCache *mc, entry = &mc->entry[address_index % mc->nr_buckets]; - while (entry && (lock || entry->lock) && entry->vaddr_base && - (entry->paddr_index != address_index || entry->size != cache_size || + while (entry && (!entry->vaddr_base || + entry->paddr_index != address_index || entry->size != cache_size || !test_bits(address_offset >> XC_PAGE_SHIFT, test_bit_size >> XC_PAGE_SHIFT, entry->valid_mapping))) { - if (!free_entry && !entry->lock) { + if (!free_entry && (!entry->lock || !entry->vaddr_base)) { free_entry = entry; free_pentry = pentry; } From bfa3f55f0fe9a4185577447faedcb5b4b302f947 Mon Sep 17 00:00:00 2001 From: "Edgar E. Iglesias" Date: Fri, 25 Apr 2025 15:16:01 +0200 Subject: [PATCH 020/136] xen: mapcache: Split mapcache_grants by ro and rw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today, we don't track write-abiliy in the cache, if a user requests a readable mapping followed by a writeable mapping on the same page, the second lookup will incorrectly hit the readable entry. Split mapcache_grants by ro and rw access. Grants will now have separate ways in the cache depending on writeability. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Stefano Stabellini Signed-off-by: Edgar E. Iglesias (cherry picked from commit 88fb705600a3b612c571efc9f1a6aed923a18dcc) Signed-off-by: Michael Tokarev --- hw/xen/xen-mapcache.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/hw/xen/xen-mapcache.c b/hw/xen/xen-mapcache.c index 2c8f861fdb..e31d379702 100644 --- a/hw/xen/xen-mapcache.c +++ b/hw/xen/xen-mapcache.c @@ -75,7 +75,8 @@ typedef struct MapCache { } MapCache; static MapCache *mapcache; -static MapCache *mapcache_grants; +static MapCache *mapcache_grants_ro; +static MapCache *mapcache_grants_rw; static xengnttab_handle *xen_region_gnttabdev; static inline void mapcache_lock(MapCache *mc) @@ -176,9 +177,12 @@ void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque) * Grant mappings must use XC_PAGE_SIZE granularity since we can't * map anything beyond the number of pages granted to us. */ - mapcache_grants = xen_map_cache_init_single(f, opaque, - XC_PAGE_SHIFT, - max_mcache_size); + mapcache_grants_ro = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); + mapcache_grants_rw = xen_map_cache_init_single(f, opaque, + XC_PAGE_SHIFT, + max_mcache_size); setrlimit(RLIMIT_AS, &rlimit_as); } @@ -456,9 +460,13 @@ uint8_t *xen_map_cache(MemoryRegion *mr, bool is_write) { bool grant = xen_mr_is_grants(mr); - MapCache *mc = grant ? mapcache_grants : mapcache; + MapCache *mc = mapcache; uint8_t *p; + if (grant) { + mc = is_write ? mapcache_grants_rw : mapcache_grants_ro; + } + if (grant && !lock) { /* * Grants are only supported via address_space_map(). Anything @@ -523,7 +531,10 @@ ram_addr_t xen_ram_addr_from_mapcache(void *ptr) addr = xen_ram_addr_from_mapcache_single(mapcache, ptr); if (addr == RAM_ADDR_INVALID) { - addr = xen_ram_addr_from_mapcache_single(mapcache_grants, ptr); + addr = xen_ram_addr_from_mapcache_single(mapcache_grants_ro, ptr); + } + if (addr == RAM_ADDR_INVALID) { + addr = xen_ram_addr_from_mapcache_single(mapcache_grants_rw, ptr); } return addr; @@ -626,7 +637,8 @@ static void xen_invalidate_map_cache_entry_single(MapCache *mc, uint8_t *buffer) static void xen_invalidate_map_cache_entry_all(uint8_t *buffer) { xen_invalidate_map_cache_entry_single(mapcache, buffer); - xen_invalidate_map_cache_entry_single(mapcache_grants, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants_ro, buffer); + xen_invalidate_map_cache_entry_single(mapcache_grants_rw, buffer); } static void xen_invalidate_map_cache_entry_bh(void *opaque) From 9340920c3f5d62d72c62f6e18094815ebc9f8144 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Wed, 7 May 2025 14:40:40 +0200 Subject: [PATCH 021/136] hw/i2c/imx: Always set interrupt status bit if interrupt condition occurs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the i.MX 8M Plus reference manual, the status flag I2C_I2SR[IIF] continues to be set when an interrupt condition occurs even when I2C interrupts are disabled (I2C_I2CR[IIEN] is clear). However, the device model only sets the flag when I2C interrupts are enabled which causes U-Boot to loop forever. Fix the device model by always setting the flag and let I2C_I2CR[IIEN] guard I2C interrupts only. Also remove the comment in the code since it merely stated the obvious and would be outdated now. Cc: qemu-stable@nongnu.org Fixes: 20d0f9cf6a41 ("i.MX: Add I2C controller emulator") Signed-off-by: Bernhard Beschow Acked-by: Corey Minyard Message-ID: <20250507124040.425773-1-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit 54e54e594bc8273d210f7ff4448c165a989cbbe8) Signed-off-by: Michael Tokarev --- hw/i2c/imx_i2c.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hw/i2c/imx_i2c.c b/hw/i2c/imx_i2c.c index d62213b9e0..f33ca93c3e 100644 --- a/hw/i2c/imx_i2c.c +++ b/hw/i2c/imx_i2c.c @@ -79,13 +79,12 @@ static void imx_i2c_reset(DeviceState *dev) static inline void imx_i2c_raise_interrupt(IMXI2CState *s) { - /* - * raise an interrupt if the device is enabled and it is configured - * to generate some interrupts. - */ - if (imx_i2c_is_enabled(s) && imx_i2c_interrupt_is_enabled(s)) { + if (imx_i2c_is_enabled(s)) { s->i2sr |= I2SR_IIF; - qemu_irq_raise(s->irq); + + if (imx_i2c_interrupt_is_enabled(s)) { + qemu_irq_raise(s->irq); + } } } From 5081dc508d4b60c1ed33f160bc6916048e098cad Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Fri, 7 Mar 2025 10:22:56 +0100 Subject: [PATCH 022/136] 9pfs: fix concurrent v9fs_reclaim_fd() calls Even though this function is serialized to be always called from main thread, v9fs_reclaim_fd() is dispatching the coroutine to a worker thread in between via its v9fs_co_*() calls, hence leading to the situation where v9fs_reclaim_fd() is effectively executed multiple times simultaniously, which renders its LRU algorithm useless and causes high latency. Fix this by adding a simple boolean variable to ensure this function is only called once at a time. No synchronization needed for this boolean variable as this function is only entered and returned on main thread. Fixes: 7a46274529c ('hw/9pfs: Add file descriptor reclaim support') Signed-off-by: Christian Schoenebeck Reviewed-by: Greg Kurz Message-Id: <5c622067efd66dd4ee5eca740dcf263f41db20b2.1741339452.git.qemu_oss@crudebyte.com> (cherry picked from commit 61da38db70affd925226ce1e8a61d761c20d045b) Signed-off-by: Michael Tokarev --- hw/9pfs/9p.c | 10 ++++++++++ hw/9pfs/9p.h | 1 + 2 files changed, 11 insertions(+) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 7cad2bce62..4f9c2dde9c 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -435,6 +435,12 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) GHashTableIter iter; gpointer fid; + /* prevent multiple coroutines running this function simultaniously */ + if (s->reclaiming) { + return; + } + s->reclaiming = true; + g_hash_table_iter_init(&iter, s->fids); QSLIST_HEAD(, V9fsFidState) reclaim_list = @@ -510,6 +516,8 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) */ put_fid(pdu, f); } + + s->reclaiming = false; } /* @@ -4324,6 +4332,8 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, s->ctx.fst = &fse->fst; fsdev_throttle_init(s->ctx.fst); + s->reclaiming = false; + rc = 0; out: if (rc) { diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 5e041e1f60..259ad32ed1 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -362,6 +362,7 @@ struct V9fsState { uint64_t qp_ndevices; /* Amount of entries in qpd_table. */ uint16_t qp_affix_next; uint64_t qp_fullpath_next; + bool reclaiming; }; /* 9p2000.L open flags */ From a0f326228a927d65e3b2dc23eab17b88aadc1a03 Mon Sep 17 00:00:00 2001 From: Christian Schoenebeck Date: Fri, 7 Mar 2025 10:23:02 +0100 Subject: [PATCH 023/136] 9pfs: fix FD leak and reduce latency of v9fs_reclaim_fd() This patch fixes two different bugs in v9fs_reclaim_fd(): 1. Reduce latency: This function calls v9fs_co_close() and v9fs_co_closedir() in a loop. Each one of the calls adds two thread hops (between main thread and a fs driver background thread). Each thread hop adds latency, which sums up in function's loop to a significant duration. Reduce overall latency by open coding what v9fs_co_close() and v9fs_co_closedir() do, executing those and the loop itself altogether in only one background thread block, hence reducing the total amount of thread hops to only two. 2. Fix file descriptor leak: The existing code called v9fs_co_close() and v9fs_co_closedir() to close file descriptors. Both functions check right at the beginning if the 9p request was cancelled: if (v9fs_request_cancelled(pdu)) { return -EINTR; } So if client sent a 'Tflush' message, v9fs_co_close() / v9fs_co_closedir() returned without having closed the file descriptor and v9fs_reclaim_fd() subsequently freed the FID without its file descriptor being closed, hence leaking those file descriptors. This 2nd bug is fixed by this patch as well by open coding v9fs_co_close() and v9fs_co_closedir() inside of v9fs_reclaim_fd() and not performing the v9fs_request_cancelled(pdu) check there. Fixes: 7a46274529c ('hw/9pfs: Add file descriptor reclaim support') Fixes: bccacf6c792 ('hw/9pfs: Implement TFLUSH operation') Signed-off-by: Christian Schoenebeck Reviewed-by: Greg Kurz Message-Id: <5747469d3f039c53147e850b456943a1d4b5485c.1741339452.git.qemu_oss@crudebyte.com> (cherry picked from commit 89f7b4da7662ecc6840ffb0846045f03f9714bc6) Signed-off-by: Michael Tokarev --- hw/9pfs/9p.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 4f9c2dde9c..80b190ff5b 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -434,6 +434,8 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) V9fsFidState *f; GHashTableIter iter; gpointer fid; + int err; + int nclosed = 0; /* prevent multiple coroutines running this function simultaniously */ if (s->reclaiming) { @@ -446,10 +448,10 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) QSLIST_HEAD(, V9fsFidState) reclaim_list = QSLIST_HEAD_INITIALIZER(reclaim_list); + /* Pick FIDs to be closed, collect them on reclaim_list. */ while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &f)) { /* - * Unlink fids cannot be reclaimed. Check - * for them and skip them. Also skip fids + * Unlinked fids cannot be reclaimed, skip those, and also skip fids * currently being operated on. */ if (f->ref || f->flags & FID_NON_RECLAIMABLE) { @@ -499,17 +501,26 @@ void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) } } /* - * Now close the fid in reclaim list. Free them if they - * are already clunked. + * Close the picked FIDs altogether on a background I/O driver thread. Do + * this all at once to keep latency (i.e. amount of thread hops between main + * thread <-> fs driver background thread) as low as possible. */ + v9fs_co_run_in_worker({ + QSLIST_FOREACH(f, &reclaim_list, reclaim_next) { + err = (f->fid_type == P9_FID_DIR) ? + s->ops->closedir(&s->ctx, &f->fs_reclaim) : + s->ops->close(&s->ctx, &f->fs_reclaim); + if (!err) { + /* total_open_fd must only be mutated on main thread */ + nclosed++; + } + } + }); + total_open_fd -= nclosed; + /* Free the closed FIDs. */ while (!QSLIST_EMPTY(&reclaim_list)) { f = QSLIST_FIRST(&reclaim_list); QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next); - if (f->fid_type == P9_FID_FILE) { - v9fs_co_close(pdu, &f->fs_reclaim); - } else if (f->fid_type == P9_FID_DIR) { - v9fs_co_closedir(pdu, &f->fs_reclaim); - } /* * Now drop the fid reference, free it * if clunked. From 8efe15927b9692bf7202c20522c9c93267232e0d Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 9 May 2025 14:49:38 -0300 Subject: [PATCH 024/136] s390x: Fix leak in machine_set_loadparm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASAN spotted a leaking string in machine_set_loadparm(): Direct leak of 9 byte(s) in 1 object(s) allocated from: #0 0x560ffb5bb379 in malloc ../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:69:3 #1 0x7f1aca926518 in g_malloc ../glib/gmem.c:106 #2 0x7f1aca94113e in g_strdup ../glib/gstrfuncs.c:364 #3 0x560ffc8afbf9 in qobject_input_type_str ../qapi/qobject-input-visitor.c:542:12 #4 0x560ffc8a80ff in visit_type_str ../qapi/qapi-visit-core.c:349:10 #5 0x560ffbe6053a in machine_set_loadparm ../hw/s390x/s390-virtio-ccw.c:802:10 #6 0x560ffc0c5e52 in object_property_set ../qom/object.c:1450:5 #7 0x560ffc0d4175 in object_property_set_qobject ../qom/qom-qobject.c:28:10 #8 0x560ffc0c6004 in object_property_set_str ../qom/object.c:1458:15 #9 0x560ffbe2ae60 in update_machine_ipl_properties ../hw/s390x/ipl.c:569:9 #10 0x560ffbe2aa65 in s390_ipl_update_diag308 ../hw/s390x/ipl.c:594:5 #11 0x560ffbdee132 in handle_diag_308 ../target/s390x/diag.c:147:9 #12 0x560ffbebb956 in helper_diag ../target/s390x/tcg/misc_helper.c:137:9 #13 0x7f1a3c51c730 (/memfd:tcg-jit (deleted)+0x39730) Cc: qemu-stable@nongnu.org Signed-off-by: Fabiano Rosas Message-ID: <20250509174938.25935-1-farosas@suse.de> Fixes: 1fd396e3228 ("s390x: Register TYPE_S390_CCW_MACHINE properties as class properties") Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Huth (cherry picked from commit bdf12f2a56bf3f13c52eb51f0a994bbfe40706b2) Signed-off-by: Michael Tokarev --- hw/s390x/s390-virtio-ccw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 75b32182eb..f1936caca2 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -802,6 +802,7 @@ static void machine_set_loadparm(Object *obj, Visitor *v, } s390_ipl_fmt_loadparm(ms->loadparm, val, errp); + g_free(val); } static void ccw_machine_class_init(ObjectClass *oc, void *data) From 40558266b1bcea744427118fd6f848a9e31364e2 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Mon, 21 Apr 2025 21:17:20 +0900 Subject: [PATCH 025/136] virtio: Call set_features during reset virtio-net expects set_features() will be called when the feature set used by the guest changes to update the number of virtqueues but it is not called during reset, which will clear all features, leaving the queues added for VIRTIO_NET_F_MQ or VIRTIO_NET_F_RSS. Not only these extra queues are visible to the guest, they will cause segmentation fault during migration. Call set_features() during reset to remove those queues for virtio-net as we call set_status(). It will also prevent similar bugs for virtio-net and other devices in the future. Fixes: f9d6dbf0bf6e ("virtio-net: remove virtio queues if the guest doesn't support multiqueue") Buglink: https://issues.redhat.com/browse/RHEL-73842 Cc: qemu-stable@nongnu.org Signed-off-by: Akihiko Odaki Message-Id: <20250421-reset-v2-1-e4c1ead88ea1@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 0caed25cd171c611781589b5402161d27d57229c) Signed-off-by: Michael Tokarev --- hw/virtio/virtio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 85110bce37..755260981e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2316,6 +2316,8 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index) } } +static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val); + void virtio_reset(void *opaque) { VirtIODevice *vdev = opaque; @@ -2346,7 +2348,7 @@ void virtio_reset(void *opaque) vdev->start_on_kick = false; vdev->started = false; vdev->broken = false; - vdev->guest_features = 0; + virtio_set_features_nocheck(vdev, 0); vdev->queue_sel = 0; vdev->status = 0; vdev->disabled = false; From 15143814cddef24b69f3b19a9baca1b964dfe40c Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Wed, 7 May 2025 09:30:55 +0200 Subject: [PATCH 026/136] hw/nvme: fix nvme hotplugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit cd59f50ab017 caused a regression on nvme hotplugging for devices with an implicit nvm subsystem. The nvme-subsys device was incorrectly left with being marked as non-hotpluggable. Fix this. Cc: qemu-stable@nongnu.org Reported-by: Stéphane Graber Tested-by: Stéphane Graber Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2950 Fixes: cd59f50ab017 ("hw/nvme: always initialize a subsystem") Reviewed-by: Keith Busch Signed-off-by: Klaus Jensen (cherry picked from commit 0b1c23a582f7bc721a9b858c289a8d165152a6a0) Signed-off-by: Michael Tokarev --- hw/nvme/subsys.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c index b617ac3892..64138cbd6d 100644 --- a/hw/nvme/subsys.c +++ b/hw/nvme/subsys.c @@ -226,7 +226,6 @@ static void nvme_subsys_class_init(ObjectClass *oc, void *data) dc->realize = nvme_subsys_realize; dc->desc = "Virtual NVMe subsystem"; - dc->hotpluggable = false; device_class_set_props(dc, nvme_subsystem_props); } From f82dd2837c30af394fa5285523f2f517815c83a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lefort?= Date: Thu, 13 Mar 2025 20:30:07 +0100 Subject: [PATCH 027/136] target/riscv: pmp: don't allow RLB to bypass rule privileges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Smepmp is supported, mseccfg.RLB allows bypassing locks when writing CSRs but should not affect interpretation of actual PMP rules. This is not the case with the current implementation where pmp_hart_has_privs calls pmp_is_locked which implements mseccfg.RLB bypass. This commit implements the correct behavior by removing mseccfg.RLB bypass from pmp_is_locked. RLB bypass when writing CSRs is implemented by adding a new pmp_is_readonly function that calls pmp_is_locked and check mseccfg.RLB. pmp_write_cfg and pmpaddr_csr_write are changed to use this new function. Signed-off-by: Loïc Lefort Reviewed-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Reviewed-by: LIU Zhiwei  Message-ID: <20250313193011.720075-2-loic@rivosinc.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 4541d205f03cf1529439f68d2ec5056685189399) Signed-off-by: Michael Tokarev --- target/riscv/pmp.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index b0841d44f4..e1e5ca589e 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -45,11 +45,6 @@ static inline uint8_t pmp_get_a_field(uint8_t cfg) */ static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) { - /* mseccfg.RLB is set */ - if (MSECCFG_RLB_ISSET(env)) { - return 0; - } - if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { return 1; } @@ -62,6 +57,15 @@ static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) return 0; } +/* + * Check whether a PMP is locked for writing or not. + * (i.e. has LOCK flag and mseccfg.RLB is unset) + */ +static int pmp_is_readonly(CPURISCVState *env, uint32_t pmp_index) +{ + return pmp_is_locked(env, pmp_index) && !MSECCFG_RLB_ISSET(env); +} + /* * Count the number of active rules. */ @@ -90,39 +94,38 @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { if (pmp_index < MAX_RISCV_PMPS) { - bool locked = true; + bool readonly = true; if (riscv_cpu_cfg(env)->ext_smepmp) { /* mseccfg.RLB is set */ if (MSECCFG_RLB_ISSET(env)) { - locked = false; + readonly = false; } /* mseccfg.MML is not set */ - if (!MSECCFG_MML_ISSET(env) && !pmp_is_locked(env, pmp_index)) { - locked = false; + if (!MSECCFG_MML_ISSET(env) && !pmp_is_readonly(env, pmp_index)) { + readonly = false; } /* mseccfg.MML is set */ if (MSECCFG_MML_ISSET(env)) { /* not adding execute bit */ if ((val & PMP_LOCK) != 0 && (val & PMP_EXEC) != PMP_EXEC) { - locked = false; + readonly = false; } /* shared region and not adding X bit */ if ((val & PMP_LOCK) != PMP_LOCK && (val & 0x7) != (PMP_WRITE | PMP_EXEC)) { - locked = false; + readonly = false; } } } else { - if (!pmp_is_locked(env, pmp_index)) { - locked = false; - } + readonly = pmp_is_readonly(env, pmp_index); } - if (locked) { - qemu_log_mask(LOG_GUEST_ERROR, "ignoring pmpcfg write - locked\n"); + if (readonly) { + qemu_log_mask(LOG_GUEST_ERROR, + "ignoring pmpcfg write - read only\n"); } else if (env->pmp_state.pmp[pmp_index].cfg_reg != val) { /* If !mseccfg.MML then ignore writes with encoding RW=01 */ if ((val & PMP_WRITE) && !(val & PMP_READ) && @@ -524,14 +527,14 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, uint8_t pmp_cfg = env->pmp_state.pmp[addr_index + 1].cfg_reg; is_next_cfg_tor = PMP_AMATCH_TOR == pmp_get_a_field(pmp_cfg); - if (pmp_is_locked(env, addr_index + 1) && is_next_cfg_tor) { + if (pmp_is_readonly(env, addr_index + 1) && is_next_cfg_tor) { qemu_log_mask(LOG_GUEST_ERROR, - "ignoring pmpaddr write - pmpcfg + 1 locked\n"); + "ignoring pmpaddr write - pmpcfg+1 read only\n"); return; } } - if (!pmp_is_locked(env, addr_index)) { + if (!pmp_is_readonly(env, addr_index)) { if (env->pmp_state.pmp[addr_index].addr_reg != val) { env->pmp_state.pmp[addr_index].addr_reg = val; pmp_update_rule_addr(env, addr_index); @@ -542,7 +545,7 @@ void pmpaddr_csr_write(CPURISCVState *env, uint32_t addr_index, } } else { qemu_log_mask(LOG_GUEST_ERROR, - "ignoring pmpaddr write - locked\n"); + "ignoring pmpaddr write - read only\n"); } } else { qemu_log_mask(LOG_GUEST_ERROR, From 504dcda206464fe30ebf8224fc39a963dadad1e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lefort?= Date: Thu, 13 Mar 2025 20:30:08 +0100 Subject: [PATCH 028/136] target/riscv: pmp: move Smepmp operation conversion into a function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Loïc Lefort Reviewed-by: Daniel Henrique Barboza Reviewed-by: Alistair Francis Reviewed-by: LIU Zhiwei Message-ID: <20250313193011.720075-3-loic@rivosinc.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 915b203745540e908943758f78f5da49e0a15e45) Signed-off-by: Michael Tokarev --- target/riscv/pmp.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index e1e5ca589e..7d65dc24a5 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -31,6 +31,15 @@ static bool pmp_write_cfg(CPURISCVState *env, uint32_t addr_index, uint8_t val); static uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t addr_index); +/* + * Convert the PMP permissions to match the truth table in the Smepmp spec. + */ +static inline uint8_t pmp_get_smepmp_operation(uint8_t cfg) +{ + return ((cfg & PMP_LOCK) >> 4) | ((cfg & PMP_READ) << 2) | + (cfg & PMP_WRITE) | ((cfg & PMP_EXEC) >> 2); +} + /* * Accessor method to extract address matching type 'a field' from cfg reg */ @@ -355,16 +364,6 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, const uint8_t a_field = pmp_get_a_field(env->pmp_state.pmp[i].cfg_reg); - /* - * Convert the PMP permissions to match the truth table in the - * Smepmp spec. - */ - const uint8_t smepmp_operation = - ((env->pmp_state.pmp[i].cfg_reg & PMP_LOCK) >> 4) | - ((env->pmp_state.pmp[i].cfg_reg & PMP_READ) << 2) | - (env->pmp_state.pmp[i].cfg_reg & PMP_WRITE) | - ((env->pmp_state.pmp[i].cfg_reg & PMP_EXEC) >> 2); - if (((s + e) == 2) && (PMP_AMATCH_OFF != a_field)) { /* * If the PMP entry is not off and the address is in range, @@ -383,6 +382,9 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, /* * If mseccfg.MML Bit set, do the enhanced pmp priv check */ + const uint8_t smepmp_operation = + pmp_get_smepmp_operation(env->pmp_state.pmp[i].cfg_reg); + if (mode == PRV_M) { switch (smepmp_operation) { case 0: From bc15a8db4ff857172625176b03b03138aa7624d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lefort?= Date: Thu, 13 Mar 2025 20:30:09 +0100 Subject: [PATCH 029/136] target/riscv: pmp: fix checks on writes to pmpcfg in Smepmp MML mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With Machine Mode Lockdown (mseccfg.MML) set and RLB not set, checks on pmpcfg writes would match the wrong cases of Smepmp truth table. The existing code allows writes for the following cases: - L=1, X=0: cases 8, 10, 12, 14 - L=0, RWX!=WX: cases 0-2, 4-6 This leaves cases 3, 7, 9, 11, 13, 15 for which writes are ignored. From the Smepmp specification: "Adding a rule with executable privileges that either is M-mode-only or a locked Shared-Region is not possible (...)" This description matches cases 9-11, 13 of the truth table. This commit implements an explicit check for these cases by using pmp_get_epmp_operation to convert between PMP configuration and Smepmp truth table cases. Signed-off-by: Loïc Lefort Reviewed-by: Daniel Henrique Barboza Reviewed-by: LIU Zhiwei Message-ID: <20250313193011.720075-4-loic@rivosinc.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 19cf1a7d9e59b71bf8d6571d4747e5c82667c3d1) Signed-off-by: Michael Tokarev --- target/riscv/pmp.c | 79 +++++++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 7d65dc24a5..c5f6cdaccb 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -75,6 +75,44 @@ static int pmp_is_readonly(CPURISCVState *env, uint32_t pmp_index) return pmp_is_locked(env, pmp_index) && !MSECCFG_RLB_ISSET(env); } +/* + * Check whether `val` is an invalid Smepmp config value + */ +static int pmp_is_invalid_smepmp_cfg(CPURISCVState *env, uint8_t val) +{ + /* No check if mseccfg.MML is not set or if mseccfg.RLB is set */ + if (!MSECCFG_MML_ISSET(env) || MSECCFG_RLB_ISSET(env)) { + return 0; + } + + /* + * Adding a rule with executable privileges that either is M-mode-only + * or a locked Shared-Region is not possible + */ + switch (pmp_get_smepmp_operation(val)) { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 12: + case 14: + case 15: + return 0; + case 9: + case 10: + case 11: + case 13: + return 1; + default: + g_assert_not_reached(); + } +} + /* * Count the number of active rules. */ @@ -103,44 +141,13 @@ static inline uint8_t pmp_read_cfg(CPURISCVState *env, uint32_t pmp_index) static bool pmp_write_cfg(CPURISCVState *env, uint32_t pmp_index, uint8_t val) { if (pmp_index < MAX_RISCV_PMPS) { - bool readonly = true; - - if (riscv_cpu_cfg(env)->ext_smepmp) { - /* mseccfg.RLB is set */ - if (MSECCFG_RLB_ISSET(env)) { - readonly = false; - } - - /* mseccfg.MML is not set */ - if (!MSECCFG_MML_ISSET(env) && !pmp_is_readonly(env, pmp_index)) { - readonly = false; - } - - /* mseccfg.MML is set */ - if (MSECCFG_MML_ISSET(env)) { - /* not adding execute bit */ - if ((val & PMP_LOCK) != 0 && (val & PMP_EXEC) != PMP_EXEC) { - readonly = false; - } - /* shared region and not adding X bit */ - if ((val & PMP_LOCK) != PMP_LOCK && - (val & 0x7) != (PMP_WRITE | PMP_EXEC)) { - readonly = false; - } - } - } else { - readonly = pmp_is_readonly(env, pmp_index); - } - - if (readonly) { + if (pmp_is_readonly(env, pmp_index)) { qemu_log_mask(LOG_GUEST_ERROR, "ignoring pmpcfg write - read only\n"); - } else if (env->pmp_state.pmp[pmp_index].cfg_reg != val) { - /* If !mseccfg.MML then ignore writes with encoding RW=01 */ - if ((val & PMP_WRITE) && !(val & PMP_READ) && - !MSECCFG_MML_ISSET(env)) { - return false; - } + } else if (pmp_is_invalid_smepmp_cfg(env, val)) { + qemu_log_mask(LOG_GUEST_ERROR, + "ignoring pmpcfg write - invalid\n"); + } else { env->pmp_state.pmp[pmp_index].cfg_reg = val; pmp_update_rule_addr(env, pmp_index); return true; From b76d4a5657129785c037a8689804caedbb045333 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 10 Apr 2025 18:17:22 +0200 Subject: [PATCH 030/136] hw/riscv: Fix type conflict of GLib function pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qtest_set_command_cb passed to g_once should match GThreadFunc, which it does not. But using g_once is actually unnecessary, because the function is called by riscv_harts_realize() under the Big QEMU Lock. Reported-by: Kohei Tokunaga Signed-off-by: Paolo Bonzini Reviewed-by: Alistair Francis Reviewed-by: Kohei Tokunaga Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20250410161722.595634-1-pbonzini@redhat.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 56cde18d048e1e1f889e31f7553e1f39f03eeec5) Signed-off-by: Michael Tokarev --- hw/riscv/riscv_hart.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/riscv/riscv_hart.c b/hw/riscv/riscv_hart.c index a55d156668..bb9104bae0 100644 --- a/hw/riscv/riscv_hart.c +++ b/hw/riscv/riscv_hart.c @@ -104,8 +104,11 @@ static bool csr_qtest_callback(CharBackend *chr, gchar **words) static void riscv_cpu_register_csr_qtest_callback(void) { - static GOnce once; - g_once(&once, (GThreadFunc)qtest_set_command_cb, csr_qtest_callback); + static bool first = true; + if (first) { + first = false; + qtest_set_command_cb(csr_qtest_callback); + } } #endif From 156321d18ebf86a21c487f995ce077d7c602d456 Mon Sep 17 00:00:00 2001 From: Ziqiao Kong Date: Tue, 15 Apr 2025 16:02:54 +0800 Subject: [PATCH 031/136] target/riscv: fix endless translation loop on big endian systems On big endian systems, pte and updated_pte hold big endian host data while pte_pa points to little endian target data. This means the branch at cpu_helper.c:1669 will be always satisfied and restart translation, causing an endless translation loop. The correctness of this patch can be deduced by: old_pte will hold value either from cpu_to_le32/64(pte) or cpu_to_le32/64(updated_pte), both of wich is litte endian. After that, an in-place conversion by le32/64_to_cpu(old_pte) ensures that old_pte now is in native endian, same with pte. Therefore, the endianness of the both side of if (old_pte != pte) is correct. Signed-off-by: Ziqiao Kong Reviewed-by: Alistair Francis Reviewed-by: Richard Henderson Message-ID: <20250415080254.3667878-2-ziqiaokong@gmail.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit ad63158bdb33dab5704ea1cf740d2ea0387175df) Signed-off-by: Michael Tokarev --- target/riscv/cpu_helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 6c4391d96b..3233b66e7e 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -1662,9 +1662,11 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, target_ulong *pte_pa = qemu_map_ram_ptr(mr->ram_block, addr1); target_ulong old_pte; if (riscv_cpu_sxl(env) == MXL_RV32) { - old_pte = qatomic_cmpxchg((uint32_t *)pte_pa, pte, updated_pte); + old_pte = qatomic_cmpxchg((uint32_t *)pte_pa, cpu_to_le32(pte), cpu_to_le32(updated_pte)); + old_pte = le32_to_cpu(old_pte); } else { - old_pte = qatomic_cmpxchg(pte_pa, pte, updated_pte); + old_pte = qatomic_cmpxchg(pte_pa, cpu_to_le64(pte), cpu_to_le64(updated_pte)); + old_pte = le64_to_cpu(old_pte); } if (old_pte != pte) { goto restart; From 70dbbc28118a66f40709f197eeea8d6cee380179 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 17 Apr 2025 15:22:06 +0800 Subject: [PATCH 032/136] common-user/host/riscv: use tail pseudoinstruction for calling tail The j pseudoinstruction maps to a JAL instruction, which can only handle a jump to somewhere with a signed 20-bit destination. In case of static linking and LTO'ing this easily leads to "relocation truncated to fit" error. Switch to use tail pseudoinstruction, which is the standard way to tail-call a function in medium code model (emits AUIPC+JALR). Signed-off-by: Icenowy Zheng Reviewed-by: Richard Henderson Message-ID: <20250417072206.364008-1-uwu@icenowy.me> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 22b448ccc6611a59d4aa54419f4d88c1f343cb35) Signed-off-by: Michael Tokarev --- common-user/host/riscv/safe-syscall.inc.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common-user/host/riscv/safe-syscall.inc.S b/common-user/host/riscv/safe-syscall.inc.S index dfe83c300e..c8b81e33d0 100644 --- a/common-user/host/riscv/safe-syscall.inc.S +++ b/common-user/host/riscv/safe-syscall.inc.S @@ -69,11 +69,11 @@ safe_syscall_end: /* code path setting errno */ 0: neg a0, a0 - j safe_syscall_set_errno_tail + tail safe_syscall_set_errno_tail /* code path when we didn't execute the syscall */ 2: li a0, QEMU_ERESTARTSYS - j safe_syscall_set_errno_tail + tail safe_syscall_set_errno_tail .cfi_endproc .size safe_syscall_base, .-safe_syscall_base From 336fed6bb87565b6b56dfc6de3a673b8def85fb7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 8 Apr 2025 18:39:29 +0800 Subject: [PATCH 033/136] target/riscv: rvv: Source vector registers cannot overlap mask register Add the relevant ISA paragraphs explaining why source (and destination) registers cannot overlap the mask register. Signed-off-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Reviewed-by: Max Chou Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-2-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 3e8d1e4a628bb234c0b5d1ccd510900047181dbd) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 29 ++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index b9883a5d32..20b1cb127b 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s) } } -/* Destination vector register group cannot overlap source mask register. */ -static bool require_vm(int vm, int vd) +/* + * Source and destination vector register groups cannot overlap source mask + * register: + * + * A vector register cannot be used to provide source operands with more than + * one EEW for a single instruction. A mask register source is considered to + * have EEW=1 for this constraint. An encoding that would result in the same + * vector register being read with two or more different EEWs, including when + * the vector register appears at different positions within two or more vector + * register groups, is reserved. + * (Section 5.2) + * + * A destination vector register group can overlap a source vector + * register group only if one of the following holds: + * 1. The destination EEW equals the source EEW. + * 2. The destination EEW is smaller than the source EEW and the overlap + * is in the lowest-numbered part of the source register group. + * 3. The destination EEW is greater than the source EEW, the source EMUL + * is at least 1, and the overlap is in the highest-numbered part of + * the destination register group. + * For the purpose of determining register group overlap constraints, mask + * elements have EEW=1. + * (Section 5.2) + */ +static bool require_vm(int vm, int v) { - return (vm != 0 || vd != 0); + return (vm != 0 || v != 0); } static bool require_nf(int vd, int nf, int lmul) From 027ea4a2f5fc559499a95efa8bdccc6e81f91536 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 8 Apr 2025 18:39:30 +0800 Subject: [PATCH 034/136] target/riscv: rvv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS Signed-off-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Reviewed-by: Max Chou Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-3-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit b0450a101d6c88789d0e8df2bcbef61bc7cd159a) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 20b1cb127b..e630f8661e 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2403,10 +2403,10 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) } /* OPFVF with WIDEN */ -#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ - if (opfvf_widen_check(s, a)) { \ + if (CHECK(s, a)) { \ uint32_t data = 0; \ static gen_helper_opfvf *const fns[2] = { \ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ @@ -2422,8 +2422,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ return false; \ } -GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) -GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) { @@ -2505,7 +2505,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) /* Vector Widening Floating-Point Multiply */ GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) @@ -2530,10 +2530,10 @@ GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_widen_check) /* Vector Floating-Point Square-Root Instruction */ From bc502d4de975aa6faf4dc2f026b73e4a159ff30d Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:31 +0800 Subject: [PATCH 035/136] target/riscv: rvv: Apply vext_check_input_eew to vrgather instructions to check mismatched input EEWs encoding constraint According to the v spec, a vector register cannot be used to provide source operands with more than one EEW for a single instruction. The vs1 EEW of vrgatherei16.vv is 16. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-4-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 629c2a8dd7506e1cb9b6b7127604641632ac453f) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 32 +++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index e630f8661e..4a0c9fbeff 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -379,6 +379,35 @@ static bool vext_check_ld_index(DisasContext *s, int vd, int vs2, return ret; } +/* + * Check whether a vector register is used to provide source operands with + * more than one EEW for the vector instruction. + * Returns true if the instruction has valid encoding + * Returns false if encoding violates the mismatched input EEWs constraint + */ +static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1, + int vs2, uint8_t eew_vs2, int vm) +{ + bool is_valid = true; + int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul; + int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul; + + /* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */ + if ((vs1 != -1 && !require_vm(vm, vs1)) || + (vs2 != -1 && !require_vm(vm, vs2))) { + is_valid = false; + } + + /* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */ + if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) && + is_overlapped(vs1, 1 << MAX(emul_vs1, 0), + vs2, 1 << MAX(emul_vs2, 0))) { + is_valid = false; + } + + return is_valid; +} + static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) { return require_vm(vm, vd) && @@ -3449,6 +3478,7 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) && require_align(a->rd, s->lmul) && require_align(a->rs1, s->lmul) && require_align(a->rs2, s->lmul) && @@ -3461,6 +3491,7 @@ static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) int8_t emul = MO_16 - s->sew + s->lmul; return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) && (emul >= -3 && emul <= 3) && require_align(a->rd, s->lmul) && require_align(a->rs1, emul) && @@ -3480,6 +3511,7 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) && require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul) && (a->rd != a->rs2) && From 0bfd8dd14b1f8211a010f48a15dda90876a0e82a Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:32 +0800 Subject: [PATCH 036/136] target/riscv: rvv: Apply vext_check_input_eew to OPIVI/OPIVX/OPFVF(vext_check_ss) instructions Handle the overlap of source registers with different EEWs. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-5-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit fbeaf35838768086b435833cb4dc5182c73ec2bc) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 4a0c9fbeff..e8197f779e 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -412,7 +412,8 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) { return require_vm(vm, vd) && require_align(vd, s->lmul) && - require_align(vs, s->lmul); + require_align(vs, s->lmul) && + vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm); } /* From 2a0eb3c2a104dec5826b3f763814d78a14274306 Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:33 +0800 Subject: [PATCH 037/136] target/riscv: rvv: Apply vext_check_input_eew to OPIVV/OPFVV(vext_check_sss) instructions Handle the overlap of source registers with different EEWs. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-6-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit fda68acb7761af40df78db18e44ca1ff20195fe0) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index e8197f779e..2a4bededd1 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -432,6 +432,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ss(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && require_align(vs1, s->lmul); } From 19d107468e394f9394a69101e2bd01f8b8f6448a Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:34 +0800 Subject: [PATCH 038/136] target/riscv: rvv: Apply vext_check_input_eew to vector slide instructions(OPIVI/OPIVX) Handle the overlap of source registers with different EEWs. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-7-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit b5480a693e3e657108746721ffe434b3bb6e7a72) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 2a4bededd1..d72792e46a 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -638,7 +638,9 @@ static bool vext_check_slide(DisasContext *s, int vd, int vs2, { bool ret = require_align(vs2, s->lmul) && require_align(vd, s->lmul) && - require_vm(vm, vd); + require_vm(vm, vd) && + vext_check_input_eew(s, -1, 0, vs2, s->sew, vm); + if (is_over) { ret &= (vd != vs2); } From faaeaa955c6209814a0e912c75cb80e085c2a321 Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:35 +0800 Subject: [PATCH 039/136] target/riscv: rvv: Apply vext_check_input_eew to vector integer extension instructions(OPMVV) Handle the overlap of source registers with different EEWs. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-8-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 411eefd56a3921ddbfdbadca596e1a8593ce834c) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index d72792e46a..585ee98b27 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3659,7 +3659,9 @@ static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div) require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul - div) && require_vm(a->vm, a->rd) && - require_noover(a->rd, s->lmul, a->rs2, s->lmul - div); + require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) && + vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm); + return ret; } From be6e117a8363734bb65f80299cbdcbe602b71f8e Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:36 +0800 Subject: [PATCH 040/136] target/riscv: rvv: Apply vext_check_input_eew to vector narrow/widen instructions Handle the overlap of source registers with different EEWs. The vd of vector widening mul-add instructions is one of the input operands. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-9-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 1f090a229f85e662394267680408bd31fd0a99c9) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvbf16.c.inc | 9 ++- target/riscv/insn_trans/trans_rvv.c.inc | 77 +++++++++++++++++----- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvbf16.c.inc b/target/riscv/insn_trans/trans_rvbf16.c.inc index 0a9cd1ec31..066dc364c5 100644 --- a/target/riscv/insn_trans/trans_rvbf16.c.inc +++ b/target/riscv/insn_trans/trans_rvbf16.c.inc @@ -119,8 +119,11 @@ static bool trans_vfwmaccbf16_vv(DisasContext *ctx, arg_vfwmaccbf16_vv *a) REQUIRE_FPU; REQUIRE_ZVFBFWMA(ctx); + uint8_t sew = ctx->sew; if (require_rvv(ctx) && vext_check_isa_ill(ctx) && (ctx->sew == MO_16) && - vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm)) { + vext_check_dss(ctx, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(ctx, a->rd, sew + 1, a->rs1, sew, a->vm) && + vext_check_input_eew(ctx, a->rd, sew + 1, a->rs2, sew, a->vm)) { uint32_t data = 0; gen_set_rm_chkfrm(ctx, RISCV_FRM_DYN); @@ -146,8 +149,10 @@ static bool trans_vfwmaccbf16_vf(DisasContext *ctx, arg_vfwmaccbf16_vf *a) REQUIRE_FPU; REQUIRE_ZVFBFWMA(ctx); + uint8_t sew = ctx->sew; if (require_rvv(ctx) && (ctx->sew == MO_16) && vext_check_isa_ill(ctx) && - vext_check_ds(ctx, a->rd, a->rs2, a->vm)) { + vext_check_ds(ctx, a->rd, a->rs2, a->vm) && + vext_check_input_eew(ctx, a->rd, sew + 1, a->rs2, sew, a->vm)) { uint32_t data = 0; gen_set_rm(ctx, RISCV_FRM_DYN); diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 585ee98b27..2d067a59e2 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -528,6 +528,7 @@ static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2, static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) { return vext_wide_check_common(s, vd, vm) && + vext_check_input_eew(s, vs, s->sew, -1, 0, vm) && require_align(vs, s->lmul) && require_noover(vd, s->lmul + 1, vs, s->lmul); } @@ -535,6 +536,7 @@ static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) { return vext_wide_check_common(s, vd, vm) && + vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) && require_align(vs, s->lmul + 1); } @@ -553,6 +555,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && require_align(vs1, s->lmul) && require_noover(vd, s->lmul + 1, vs1, s->lmul); } @@ -575,12 +578,14 @@ static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs1, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && require_align(vs2, s->lmul + 1); } static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) { - bool ret = vext_narrow_check_common(s, vd, vs, vm); + bool ret = vext_narrow_check_common(s, vd, vs, vm) && + vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm); if (vd != vs) { ret &= require_noover(vd, s->lmul, vs, s->lmul + 1); } @@ -603,6 +608,7 @@ static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_sd(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && require_align(vs1, s->lmul); } @@ -1531,6 +1537,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +/* OPIVV with overwrite and WIDEN */ +static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, gen_helper_gvec_4_ptr *fn, bool (*checkfn)(DisasContext *, arg_rmrr *)) @@ -1578,6 +1594,14 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + #define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ @@ -2049,13 +2073,13 @@ GEN_OPIVX_TRANS(vmadd_vx, opivx_check) GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) /* Vector Widening Integer Multiply-Add Instructions */ -GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check) /* Vector Integer Merge and Move Instructions */ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) @@ -2396,6 +2420,17 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + /* OPFVV with WIDEN */ #define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ @@ -2435,6 +2470,16 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + /* OPFVF with WIDEN */ #define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ @@ -2559,14 +2604,14 @@ GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ -GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) /* Vector Floating-Point Square-Root Instruction */ From 43a4f232fa8d8cab2c748112bba363e92da1ea37 Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:37 +0800 Subject: [PATCH 041/136] target/riscv: rvv: Apply vext_check_input_eew to vector indexed load/store instructions Handle the overlap of source registers with different EEWs. Co-authored-by: Anton Blanchard Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-10-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit db21c3eb05504c4cedaad4f7b19e588361b02385) Signed-off-by: Michael Tokarev --- target/riscv/insn_trans/trans_rvv.c.inc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 2d067a59e2..445a0b72a5 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1043,7 +1043,8 @@ static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) { return require_rvv(s) && vext_check_isa_ill(s) && - vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew); + vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) && + vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm); } GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check) @@ -1095,7 +1096,8 @@ static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) { return require_rvv(s) && vext_check_isa_ill(s) && - vext_check_st_index(s, a->rd, a->rs2, a->nf, eew); + vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) && + vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm); } GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check) From 7b217815625381bca49067ac6d12c10556a76e11 Mon Sep 17 00:00:00 2001 From: Max Chou Date: Tue, 8 Apr 2025 18:39:38 +0800 Subject: [PATCH 042/136] target/riscv: Fix the rvv reserved encoding of unmasked instructions According to the v spec, the encodings of vcomoress.vm and vector mask-register logical instructions with vm=0 are reserved. Reviewed-by: Daniel Henrique Barboza Signed-off-by: Max Chou Message-ID: <20250408103938.3623486-11-max.chou@sifive.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 8539a1244bf240d28917effb88a140eb58e45e88) Signed-off-by: Michael Tokarev --- target/riscv/insn32.decode | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 6d1a13c826..cd23b1f3a9 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -703,14 +703,14 @@ vfredmax_vs 000111 . ..... ..... 001 ..... 1010111 @r_vm # Vector widening ordered and unordered float reduction sum vfwredusum_vs 110001 . ..... ..... 001 ..... 1010111 @r_vm vfwredosum_vs 110011 . ..... ..... 001 ..... 1010111 @r_vm -vmand_mm 011001 - ..... ..... 010 ..... 1010111 @r -vmnand_mm 011101 - ..... ..... 010 ..... 1010111 @r -vmandn_mm 011000 - ..... ..... 010 ..... 1010111 @r -vmxor_mm 011011 - ..... ..... 010 ..... 1010111 @r -vmor_mm 011010 - ..... ..... 010 ..... 1010111 @r -vmnor_mm 011110 - ..... ..... 010 ..... 1010111 @r -vmorn_mm 011100 - ..... ..... 010 ..... 1010111 @r -vmxnor_mm 011111 - ..... ..... 010 ..... 1010111 @r +vmand_mm 011001 1 ..... ..... 010 ..... 1010111 @r +vmnand_mm 011101 1 ..... ..... 010 ..... 1010111 @r +vmandn_mm 011000 1 ..... ..... 010 ..... 1010111 @r +vmxor_mm 011011 1 ..... ..... 010 ..... 1010111 @r +vmor_mm 011010 1 ..... ..... 010 ..... 1010111 @r +vmnor_mm 011110 1 ..... ..... 010 ..... 1010111 @r +vmorn_mm 011100 1 ..... ..... 010 ..... 1010111 @r +vmxnor_mm 011111 1 ..... ..... 010 ..... 1010111 @r vcpop_m 010000 . ..... 10000 010 ..... 1010111 @r2_vm vfirst_m 010000 . ..... 10001 010 ..... 1010111 @r2_vm vmsbf_m 010100 . ..... 00001 010 ..... 1010111 @r2_vm @@ -732,7 +732,7 @@ vrgather_vv 001100 . ..... ..... 000 ..... 1010111 @r_vm vrgatherei16_vv 001110 . ..... ..... 000 ..... 1010111 @r_vm vrgather_vx 001100 . ..... ..... 100 ..... 1010111 @r_vm vrgather_vi 001100 . ..... ..... 011 ..... 1010111 @r_vm -vcompress_vm 010111 - ..... ..... 010 ..... 1010111 @r +vcompress_vm 010111 1 ..... ..... 010 ..... 1010111 @r vmv1r_v 100111 1 ..... 00000 011 ..... 1010111 @r2rd vmv2r_v 100111 1 ..... 00001 011 ..... 1010111 @r2rd vmv4r_v 100111 1 ..... 00011 011 ..... 1010111 @r2rd From 95c0e6e06516ec4bd552a272663f493c6faf5151 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 14 Apr 2025 21:30:06 +0000 Subject: [PATCH 043/136] target/riscv: Fix vslidedown with rvv_ta_all_1s vslidedown always zeroes elements past vl, where it should use the tail policy. Signed-off-by: Anton Blanchard Reviewed-by: Alistair Francis Message-ID: <20250414213006.3509058-1-antonb@tenstorrent.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 2669b696e243b64f8ea1a6468dcee255de99f08d) Signed-off-by: Michael Tokarev --- target/riscv/vector_helper.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 67b3bafebb..1012d38c8a 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -5113,9 +5113,11 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ } \ \ for (i = i_max; i < vl; ++i) { \ - if (vm || vext_elem_mask(v0, i)) { \ - *((ETYPE *)vd + H(i)) = 0; \ + if (!vm && !vext_elem_mask(v0, i)) { \ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ + continue; \ } \ + *((ETYPE *)vd + H(i)) = 0; \ } \ \ env->vstart = 0; \ From c5008143177703ae6d4d5786d0587f0ee9f77b27 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 29 Apr 2025 09:44:13 -0300 Subject: [PATCH 044/136] target/riscv/kvm: minor fixes/tweaks Remove an unused 'KVMScratchCPU' pointer argument in kvm_riscv_check_sbi_dbcn_support(). Put kvm_riscv_reset_regs_csr() after kvm_riscv_put_regs_csr(). This will make a future patch diff easier to read, when changes in kvm_riscv_reset_regs_csr() and kvm_riscv_get_regs_csr() will be made. Fixes: a6b53378f5 ("target/riscv/kvm: implement SBI debug console (DBCN) calls") Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis Message-ID: <20250429124421.223883-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 73f81da0a3628180409a0ae90ece19534bcdf09b) Signed-off-by: Michael Tokarev --- target/riscv/kvm/kvm-cpu.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 0f4997a918..afe3d3e609 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -613,19 +613,6 @@ static int kvm_riscv_put_regs_core(CPUState *cs) return ret; } -static void kvm_riscv_reset_regs_csr(CPURISCVState *env) -{ - env->mstatus = 0; - env->mie = 0; - env->stvec = 0; - env->sscratch = 0; - env->sepc = 0; - env->scause = 0; - env->stval = 0; - env->mip = 0; - env->satp = 0; -} - static int kvm_riscv_get_regs_csr(CPUState *cs) { CPURISCVState *env = &RISCV_CPU(cs)->env; @@ -660,6 +647,19 @@ static int kvm_riscv_put_regs_csr(CPUState *cs) return 0; } +static void kvm_riscv_reset_regs_csr(CPURISCVState *env) +{ + env->mstatus = 0; + env->mie = 0; + env->stvec = 0; + env->sscratch = 0; + env->sepc = 0; + env->scause = 0; + env->stval = 0; + env->mip = 0; + env->satp = 0; +} + static int kvm_riscv_get_regs_fp(CPUState *cs) { int ret = 0; @@ -1078,7 +1078,6 @@ static int uint64_cmp(const void *a, const void *b) } static void kvm_riscv_check_sbi_dbcn_support(RISCVCPU *cpu, - KVMScratchCPU *kvmcpu, struct kvm_reg_list *reglist) { struct kvm_reg_list *reg_search; @@ -1197,7 +1196,7 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) kvm_riscv_read_vlenb(cpu, kvmcpu, reglist); } - kvm_riscv_check_sbi_dbcn_support(cpu, kvmcpu, reglist); + kvm_riscv_check_sbi_dbcn_support(cpu, reglist); } static void riscv_init_kvm_registers(Object *cpu_obj) From 04b855727fac1841171c28672e269f980f45de70 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 29 Apr 2025 09:44:14 -0300 Subject: [PATCH 045/136] target/riscv/kvm: fix leak in kvm_riscv_init_multiext_cfg() 'reglist' is being g-malloc'ed but never freed. Reported-by: Andrew Jones Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis Message-ID: <20250429124421.223883-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 906af6de9462c5192547cca0beac2c134659a437) Signed-off-by: Michael Tokarev --- target/riscv/kvm/kvm-cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index afe3d3e609..616360bd04 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1119,10 +1119,10 @@ static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { + g_autofree struct kvm_reg_list *reglist = NULL; KVMCPUConfig *multi_ext_cfg; struct kvm_one_reg reg; struct kvm_reg_list rl_struct; - struct kvm_reg_list *reglist; uint64_t val, reg_id, *reg_search; int i, ret; From ea8eb871aba3a22ef4cbb2bacd3021e88da43434 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 29 Apr 2025 09:44:15 -0300 Subject: [PATCH 046/136] target/riscv/kvm: turn u32/u64 reg functions into macros This change is motivated by a future change w.r.t CSRs management. We want to handle them the same way as KVM extensions, i.e. a static array with KVMCPUConfig objs that will be read/write during init and so on. But to do that properly we must be able to declare a static array that hold KVM regs. C does not allow to init static arrays and use functions as initializers, e.g. we can't do: .kvm_reg_id = kvm_riscv_reg_id_ulong(...) When instantiating the array. We can do that with macros though, so our goal is turn kvm_riscv_reg_ulong() in a macro. It is cleaner to turn every other reg_id_*() function in macros, and ulong will end up using the macros for u32 and u64, so we'll start with them. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis Message-ID: <20250429124421.223883-4-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit b6096103494506514d9bfa442f62fef36ffc8fba) Signed-off-by: Michael Tokarev --- target/riscv/kvm/kvm-cpu.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 616360bd04..991adbaf74 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -58,6 +58,12 @@ void riscv_kvm_aplic_request(void *opaque, int irq, int level) static bool cap_has_mp_state; +#define KVM_RISCV_REG_ID_U32(type, idx) (KVM_REG_RISCV | KVM_REG_SIZE_U32 | \ + type | idx) + +#define KVM_RISCV_REG_ID_U64(type, idx) (KVM_REG_RISCV | KVM_REG_SIZE_U64 | \ + type | idx) + static uint64_t kvm_riscv_reg_id_ulong(CPURISCVState *env, uint64_t type, uint64_t idx) { @@ -76,16 +82,6 @@ static uint64_t kvm_riscv_reg_id_ulong(CPURISCVState *env, uint64_t type, return id; } -static uint64_t kvm_riscv_reg_id_u32(uint64_t type, uint64_t idx) -{ - return KVM_REG_RISCV | KVM_REG_SIZE_U32 | type | idx; -} - -static uint64_t kvm_riscv_reg_id_u64(uint64_t type, uint64_t idx) -{ - return KVM_REG_RISCV | KVM_REG_SIZE_U64 | type | idx; -} - static uint64_t kvm_encode_reg_size_id(uint64_t id, size_t size_b) { uint64_t size_ctz = __builtin_ctz(size_b); @@ -119,12 +115,12 @@ static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu, kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, \ KVM_REG_RISCV_CONFIG_REG(name)) -#define RISCV_TIMER_REG(name) kvm_riscv_reg_id_u64(KVM_REG_RISCV_TIMER, \ +#define RISCV_TIMER_REG(name) KVM_RISCV_REG_ID_U64(KVM_REG_RISCV_TIMER, \ KVM_REG_RISCV_TIMER_REG(name)) -#define RISCV_FP_F_REG(idx) kvm_riscv_reg_id_u32(KVM_REG_RISCV_FP_F, idx) +#define RISCV_FP_F_REG(idx) KVM_RISCV_REG_ID_U32(KVM_REG_RISCV_FP_F, idx) -#define RISCV_FP_D_REG(idx) kvm_riscv_reg_id_u64(KVM_REG_RISCV_FP_D, idx) +#define RISCV_FP_D_REG(idx) KVM_RISCV_REG_ID_U64(KVM_REG_RISCV_FP_D, idx) #define RISCV_VECTOR_CSR_REG(env, name) \ kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_VECTOR, \ From 90f7e23cb7baa6f836723c3ae44039e46fee2a6d Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 29 Apr 2025 09:44:16 -0300 Subject: [PATCH 047/136] target/riscv/kvm: turn kvm_riscv_reg_id_ulong() into a macro We need the reg_id_ulong() helper to be a macro to be able to create a static array of KVMCPUConfig that will hold CSR information. Despite the amount of changes all of them are tedious/trivial: - replace instances of "kvm_riscv_reg_id_ulong" with "KVM_RISCV_REG_ID_ULONG"; - RISCV_CORE_REG(), RISCV_CSR_REG(), RISCV_CONFIG_REG() and RISCV_VECTOR_CSR_REG() only receives one 'name' arg. Remove unneeded 'env' variables when applicable. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Alistair Francis Message-ID: <20250429124421.223883-5-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit 11766e17616a5a4181d4a63f88adf67ac52c553b) Signed-off-by: Michael Tokarev --- target/riscv/kvm/kvm-cpu.c | 99 ++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 58 deletions(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 991adbaf74..1afc4b729e 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -64,23 +64,11 @@ static bool cap_has_mp_state; #define KVM_RISCV_REG_ID_U64(type, idx) (KVM_REG_RISCV | KVM_REG_SIZE_U64 | \ type | idx) -static uint64_t kvm_riscv_reg_id_ulong(CPURISCVState *env, uint64_t type, - uint64_t idx) -{ - uint64_t id = KVM_REG_RISCV | type | idx; - - switch (riscv_cpu_mxl(env)) { - case MXL_RV32: - id |= KVM_REG_SIZE_U32; - break; - case MXL_RV64: - id |= KVM_REG_SIZE_U64; - break; - default: - g_assert_not_reached(); - } - return id; -} +#if defined(TARGET_RISCV64) +#define KVM_RISCV_REG_ID_ULONG(type, idx) KVM_RISCV_REG_ID_U64(type, idx) +#else +#define KVM_RISCV_REG_ID_ULONG(type, idx) KVM_RISCV_REG_ID_U32(type, idx) +#endif static uint64_t kvm_encode_reg_size_id(uint64_t id, size_t size_b) { @@ -103,16 +91,16 @@ static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu, return kvm_encode_reg_size_id(id, size_b); } -#define RISCV_CORE_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, \ +#define RISCV_CORE_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CORE, \ KVM_REG_RISCV_CORE_REG(name)) -#define RISCV_CSR_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CSR, \ +#define RISCV_CSR_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CSR, \ KVM_REG_RISCV_CSR_REG(name)) -#define RISCV_CONFIG_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, \ +#define RISCV_CONFIG_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, \ KVM_REG_RISCV_CONFIG_REG(name)) #define RISCV_TIMER_REG(name) KVM_RISCV_REG_ID_U64(KVM_REG_RISCV_TIMER, \ @@ -122,13 +110,13 @@ static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu, #define RISCV_FP_D_REG(idx) KVM_RISCV_REG_ID_U64(KVM_REG_RISCV_FP_D, idx) -#define RISCV_VECTOR_CSR_REG(env, name) \ - kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_VECTOR, \ +#define RISCV_VECTOR_CSR_REG(name) \ + KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_VECTOR, \ KVM_REG_RISCV_VECTOR_CSR_REG(name)) #define KVM_RISCV_GET_CSR(cs, env, csr, reg) \ do { \ - int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \ + int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(csr), ®); \ if (_ret) { \ return _ret; \ } \ @@ -136,7 +124,7 @@ static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu, #define KVM_RISCV_SET_CSR(cs, env, csr, reg) \ do { \ - int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), ®); \ + int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(csr), ®); \ if (_ret) { \ return _ret; \ } \ @@ -244,7 +232,7 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) /* If we're here we're going to disable the MISA bit */ reg = 0; - id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT, + id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, misa_cfg->kvm_reg_id); ret = kvm_set_one_reg(cs, id, ®); if (ret != 0) { @@ -430,7 +418,6 @@ static KVMCPUConfig kvm_sbi_dbcn = { static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) { - CPURISCVState *env = &cpu->env; uint64_t id, reg; int i, ret; @@ -441,7 +428,7 @@ static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs) continue; } - id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT, + id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, multi_ext_cfg->kvm_reg_id); reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg); ret = kvm_set_one_reg(cs, id, ®); @@ -566,14 +553,14 @@ static int kvm_riscv_get_regs_core(CPUState *cs) target_ulong reg; CPURISCVState *env = &RISCV_CPU(cs)->env; - ret = kvm_get_one_reg(cs, RISCV_CORE_REG(env, regs.pc), ®); + ret = kvm_get_one_reg(cs, RISCV_CORE_REG(regs.pc), ®); if (ret) { return ret; } env->pc = reg; for (i = 1; i < 32; i++) { - uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i); + uint64_t id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CORE, i); ret = kvm_get_one_reg(cs, id, ®); if (ret) { return ret; @@ -592,13 +579,13 @@ static int kvm_riscv_put_regs_core(CPUState *cs) CPURISCVState *env = &RISCV_CPU(cs)->env; reg = env->pc; - ret = kvm_set_one_reg(cs, RISCV_CORE_REG(env, regs.pc), ®); + ret = kvm_set_one_reg(cs, RISCV_CORE_REG(regs.pc), ®); if (ret) { return ret; } for (i = 1; i < 32; i++) { - uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i); + uint64_t id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CORE, i); reg = env->gpr[i]; ret = kvm_set_one_reg(cs, id, ®); if (ret) { @@ -796,26 +783,26 @@ static int kvm_riscv_get_regs_vector(CPUState *cs) return 0; } - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vstart), ®); if (ret) { return ret; } env->vstart = reg; - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vl), ®); if (ret) { return ret; } env->vl = reg; - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vtype), ®); if (ret) { return ret; } env->vtype = reg; if (kvm_v_vlenb.supported) { - ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), ®); + ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(vlenb), ®); if (ret) { return ret; } @@ -853,26 +840,26 @@ static int kvm_riscv_put_regs_vector(CPUState *cs) } reg = env->vstart; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vstart), ®); if (ret) { return ret; } reg = env->vl; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vl), ®); if (ret) { return ret; } reg = env->vtype; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vtype), ®); if (ret) { return ret; } if (kvm_v_vlenb.supported) { reg = cpu->cfg.vlenb; - ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), ®); + ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(vlenb), ®); for (int i = 0; i < 32; i++) { /* @@ -951,25 +938,24 @@ static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch) static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { - CPURISCVState *env = &cpu->env; struct kvm_one_reg reg; int ret; - reg.id = RISCV_CONFIG_REG(env, mvendorid); + reg.id = RISCV_CONFIG_REG(mvendorid); reg.addr = (uint64_t)&cpu->cfg.mvendorid; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { error_report("Unable to retrieve mvendorid from host, error %d", ret); } - reg.id = RISCV_CONFIG_REG(env, marchid); + reg.id = RISCV_CONFIG_REG(marchid); reg.addr = (uint64_t)&cpu->cfg.marchid; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { error_report("Unable to retrieve marchid from host, error %d", ret); } - reg.id = RISCV_CONFIG_REG(env, mimpid); + reg.id = RISCV_CONFIG_REG(mimpid); reg.addr = (uint64_t)&cpu->cfg.mimpid; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { @@ -984,7 +970,7 @@ static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu, struct kvm_one_reg reg; int ret; - reg.id = RISCV_CONFIG_REG(env, isa); + reg.id = RISCV_CONFIG_REG(isa); reg.addr = (uint64_t)&env->misa_ext_mask; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); @@ -1001,11 +987,10 @@ static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu, static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, KVMCPUConfig *cbomz_cfg) { - CPURISCVState *env = &cpu->env; struct kvm_one_reg reg; int ret; - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, cbomz_cfg->kvm_reg_id); reg.addr = (uint64_t)kvmconfig_get_cfg_addr(cpu, cbomz_cfg); ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); @@ -1019,7 +1004,6 @@ static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { - CPURISCVState *env = &cpu->env; uint64_t val; int i, ret; @@ -1027,7 +1011,7 @@ static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i]; struct kvm_one_reg reg; - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, multi_ext_cfg->kvm_reg_id); reg.addr = (uint64_t)&val; ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); @@ -1159,7 +1143,7 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) { multi_ext_cfg = &kvm_multi_ext_cfgs[i]; - reg_id = kvm_riscv_reg_id_ulong(&cpu->env, KVM_REG_RISCV_ISA_EXT, + reg_id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_ISA_EXT, multi_ext_cfg->kvm_reg_id); reg_search = bsearch(®_id, reglist->reg, reglist->n, sizeof(uint64_t), uint64_cmp); @@ -1338,12 +1322,11 @@ void kvm_arch_init_irq_routing(KVMState *s) static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) { - CPURISCVState *env = &cpu->env; target_ulong reg; uint64_t id; int ret; - id = RISCV_CONFIG_REG(env, mvendorid); + id = RISCV_CONFIG_REG(mvendorid); /* * cfg.mvendorid is an uint32 but a target_ulong will * be written. Assign it to a target_ulong var to avoid @@ -1355,13 +1338,13 @@ static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs) return ret; } - id = RISCV_CONFIG_REG(env, marchid); + id = RISCV_CONFIG_REG(marchid); ret = kvm_set_one_reg(cs, id, &cpu->cfg.marchid); if (ret != 0) { return ret; } - id = RISCV_CONFIG_REG(env, mimpid); + id = RISCV_CONFIG_REG(mimpid); ret = kvm_set_one_reg(cs, id, &cpu->cfg.mimpid); return ret; @@ -1911,7 +1894,7 @@ void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) if (cpu->cfg.ext_zicbom && riscv_cpu_option_set(kvm_cbom_blocksize.name)) { - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, kvm_cbom_blocksize.kvm_reg_id); reg.addr = (uint64_t)&val; ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, ®); @@ -1930,7 +1913,7 @@ void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp) if (cpu->cfg.ext_zicboz && riscv_cpu_option_set(kvm_cboz_blocksize.name)) { - reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, + reg.id = KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_CONFIG, kvm_cboz_blocksize.kvm_reg_id); reg.addr = (uint64_t)&val; ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, ®); From 6ba14ba513a4fef01f5e49e48230c1dffaa9115d Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 29 Apr 2025 09:44:17 -0300 Subject: [PATCH 048/136] target/riscv/kvm: add kvm_csr_cfgs[] At this moment we're not checking if the host has support for any specific CSR before doing get/put regs. This will cause problems if the host KVM doesn't support it (see [1] as an example). We'll use the same approach done with the CPU extensions: read all known KVM CSRs during init() to check for availability, then read/write them if they are present. This will be made by either using get-reglist or by directly reading the CSRs. For now we'll just convert the CSRs to use a kvm_csr_cfg[] array, reusing the same KVMCPUConfig abstraction we use for extensions, and use the array in (get|put)_csr_regs() instead of manually listing them. A lot of boilerplate will be added but at least we'll automate the get/put procedure for CSRs, i.e. adding a new CSR in the future will be a matter of adding it in kvm_csr_regs[] and everything else will be taken care of. Despite all the code changes no behavioral change is made. [1] https://lore.kernel.org/qemu-riscv/CABJz62OfUDHYkQ0T3rGHStQprf1c7_E0qBLbLKhfv=+jb0SYAw@mail.gmail.com/ Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Acked-by: Alistair Francis Message-ID: <20250429124421.223883-6-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit d3b6f1742c36e3a3c1e74cb60646ee98a4e39ea3) Signed-off-by: Michael Tokarev --- target/riscv/cpu.h | 1 + target/riscv/kvm/kvm-cpu.c | 121 ++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 36 deletions(-) diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 51e49e03de..7a56666f9a 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -79,6 +79,7 @@ const char *riscv_get_misa_ext_name(uint32_t bit); const char *riscv_get_misa_ext_description(uint32_t bit); #define CPU_CFG_OFFSET(_prop) offsetof(struct RISCVCPUConfig, _prop) +#define ENV_CSR_OFFSET(_csr) offsetof(CPURISCVState, _csr) typedef struct riscv_cpu_profile { struct riscv_cpu_profile *u_parent; diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 1afc4b729e..751494a8ec 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -114,22 +114,6 @@ static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu, KVM_RISCV_REG_ID_ULONG(KVM_REG_RISCV_VECTOR, \ KVM_REG_RISCV_VECTOR_CSR_REG(name)) -#define KVM_RISCV_GET_CSR(cs, env, csr, reg) \ - do { \ - int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(csr), ®); \ - if (_ret) { \ - return _ret; \ - } \ - } while (0) - -#define KVM_RISCV_SET_CSR(cs, env, csr, reg) \ - do { \ - int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(csr), ®); \ - if (_ret) { \ - return _ret; \ - } \ - } while (0) - #define KVM_RISCV_GET_TIMER(cs, name, reg) \ do { \ int ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(name), ®); \ @@ -251,6 +235,53 @@ static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs) } } +#define KVM_CSR_CFG(_name, _env_prop, reg_id) \ + {.name = _name, .offset = ENV_CSR_OFFSET(_env_prop), \ + .kvm_reg_id = reg_id} + +static KVMCPUConfig kvm_csr_cfgs[] = { + KVM_CSR_CFG("sstatus", mstatus, RISCV_CSR_REG(sstatus)), + KVM_CSR_CFG("sie", mie, RISCV_CSR_REG(sie)), + KVM_CSR_CFG("stvec", stvec, RISCV_CSR_REG(stvec)), + KVM_CSR_CFG("sscratch", sscratch, RISCV_CSR_REG(sscratch)), + KVM_CSR_CFG("sepc", sepc, RISCV_CSR_REG(sepc)), + KVM_CSR_CFG("scause", scause, RISCV_CSR_REG(scause)), + KVM_CSR_CFG("stval", stval, RISCV_CSR_REG(stval)), + KVM_CSR_CFG("sip", mip, RISCV_CSR_REG(sip)), + KVM_CSR_CFG("satp", satp, RISCV_CSR_REG(satp)), +}; + +static void *kvmconfig_get_env_addr(RISCVCPU *cpu, KVMCPUConfig *csr_cfg) +{ + return (void *)&cpu->env + csr_cfg->offset; +} + +static uint32_t kvm_cpu_csr_get_u32(RISCVCPU *cpu, KVMCPUConfig *csr_cfg) +{ + uint32_t *val32 = kvmconfig_get_env_addr(cpu, csr_cfg); + return *val32; +} + +static uint64_t kvm_cpu_csr_get_u64(RISCVCPU *cpu, KVMCPUConfig *csr_cfg) +{ + uint64_t *val64 = kvmconfig_get_env_addr(cpu, csr_cfg); + return *val64; +} + +static void kvm_cpu_csr_set_u32(RISCVCPU *cpu, KVMCPUConfig *csr_cfg, + uint32_t val) +{ + uint32_t *val32 = kvmconfig_get_env_addr(cpu, csr_cfg); + *val32 = val; +} + +static void kvm_cpu_csr_set_u64(RISCVCPU *cpu, KVMCPUConfig *csr_cfg, + uint64_t val) +{ + uint64_t *val64 = kvmconfig_get_env_addr(cpu, csr_cfg); + *val64 = val; +} + #define KVM_EXT_CFG(_name, _prop, _reg_id) \ {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \ .kvm_reg_id = _reg_id} @@ -598,34 +629,52 @@ static int kvm_riscv_put_regs_core(CPUState *cs) static int kvm_riscv_get_regs_csr(CPUState *cs) { - CPURISCVState *env = &RISCV_CPU(cs)->env; + RISCVCPU *cpu = RISCV_CPU(cs); + uint64_t reg; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; - KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus); - KVM_RISCV_GET_CSR(cs, env, sie, env->mie); - KVM_RISCV_GET_CSR(cs, env, stvec, env->stvec); - KVM_RISCV_GET_CSR(cs, env, sscratch, env->sscratch); - KVM_RISCV_GET_CSR(cs, env, sepc, env->sepc); - KVM_RISCV_GET_CSR(cs, env, scause, env->scause); - KVM_RISCV_GET_CSR(cs, env, stval, env->stval); - KVM_RISCV_GET_CSR(cs, env, sip, env->mip); - KVM_RISCV_GET_CSR(cs, env, satp, env->satp); + ret = kvm_get_one_reg(cs, csr_cfg->kvm_reg_id, ®); + if (ret) { + return ret; + } + + if (KVM_REG_SIZE(csr_cfg->kvm_reg_id) == sizeof(uint32_t)) { + kvm_cpu_csr_set_u32(cpu, csr_cfg, reg); + } else if (KVM_REG_SIZE(csr_cfg->kvm_reg_id) == sizeof(uint64_t)) { + kvm_cpu_csr_set_u64(cpu, csr_cfg, reg); + } else { + g_assert_not_reached(); + } + } return 0; } static int kvm_riscv_put_regs_csr(CPUState *cs) { - CPURISCVState *env = &RISCV_CPU(cs)->env; + RISCVCPU *cpu = RISCV_CPU(cs); + uint64_t reg; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + + if (KVM_REG_SIZE(csr_cfg->kvm_reg_id) == sizeof(uint32_t)) { + reg = kvm_cpu_csr_get_u32(cpu, csr_cfg); + } else if (KVM_REG_SIZE(csr_cfg->kvm_reg_id) == sizeof(uint64_t)) { + reg = kvm_cpu_csr_get_u64(cpu, csr_cfg); + } else { + g_assert_not_reached(); + } - KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus); - KVM_RISCV_SET_CSR(cs, env, sie, env->mie); - KVM_RISCV_SET_CSR(cs, env, stvec, env->stvec); - KVM_RISCV_SET_CSR(cs, env, sscratch, env->sscratch); - KVM_RISCV_SET_CSR(cs, env, sepc, env->sepc); - KVM_RISCV_SET_CSR(cs, env, scause, env->scause); - KVM_RISCV_SET_CSR(cs, env, stval, env->stval); - KVM_RISCV_SET_CSR(cs, env, sip, env->mip); - KVM_RISCV_SET_CSR(cs, env, satp, env->satp); + ret = kvm_set_one_reg(cs, csr_cfg->kvm_reg_id, ®); + if (ret) { + return ret; + } + } return 0; } From 9ac56fb385e94c6382928d604034861e189cfb0c Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Tue, 29 Apr 2025 09:44:18 -0300 Subject: [PATCH 049/136] target/riscv/kvm: do not read unavailable CSRs [1] reports that commit 4db19d5b21 broke a KVM guest running kernel 6.6. This happens because the kernel does not know 'senvcfg', making it unable to boot because QEMU is reading/wriiting it without any checks. After converting the CSRs to do "automated" get/put reg procedures in the previous patch we can now scan for availability. Two functions are created: - kvm_riscv_read_csr_cfg_legacy() will check if the CSR exists by brute forcing KVM_GET_ONE_REG in each one of them, interpreting an EINVAL return as indication that the CSR isn't available. This will be use in absence of KVM_GET_REG_LIST; - kvm_riscv_read_csr_cfg() will use the existing result of get_reg_list to check if the CSRs ids are present. kvm_riscv_init_multiext_cfg() is now kvm_riscv_init_cfg() to reflect that the function is also dealing with CSRs. [1] https://lore.kernel.org/qemu-riscv/CABJz62OfUDHYkQ0T3rGHStQprf1c7_E0qBLbLKhfv=+jb0SYAw@mail.gmail.com/ Fixes: 4db19d5b21 ("target/riscv/kvm: add missing KVM CSRs") Reported-by: Andrea Bolognani Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Acked-by: Alistair Francis Message-ID: <20250429124421.223883-7-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Cc: qemu-stable@nongnu.org (cherry picked from commit f396c217a53d9b7960dd002fbb07cfe1d46b27aa) Signed-off-by: Michael Tokarev --- target/riscv/kvm/kvm-cpu.c | 62 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 751494a8ec..8001ca153e 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -636,6 +636,10 @@ static int kvm_riscv_get_regs_csr(CPUState *cs) for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + if (!csr_cfg->supported) { + continue; + } + ret = kvm_get_one_reg(cs, csr_cfg->kvm_reg_id, ®); if (ret) { return ret; @@ -662,6 +666,10 @@ static int kvm_riscv_put_regs_csr(CPUState *cs) for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + if (!csr_cfg->supported) { + continue; + } + if (KVM_REG_SIZE(csr_cfg->kvm_reg_id) == sizeof(uint32_t)) { reg = kvm_cpu_csr_get_u32(cpu, csr_cfg); } else if (KVM_REG_SIZE(csr_cfg->kvm_reg_id) == sizeof(uint64_t)) { @@ -1090,6 +1098,32 @@ static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu, } } +static void kvm_riscv_read_csr_cfg_legacy(KVMScratchCPU *kvmcpu) +{ + uint64_t val; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + struct kvm_one_reg reg; + + reg.id = csr_cfg->kvm_reg_id; + reg.addr = (uint64_t)&val; + ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); + if (ret != 0) { + if (errno == EINVAL) { + csr_cfg->supported = false; + } else { + error_report("Unable to read KVM CSR %s: %s", + csr_cfg->name, strerror(errno)); + exit(EXIT_FAILURE); + } + } else { + csr_cfg->supported = true; + } + } +} + static int uint64_cmp(const void *a, const void *b) { uint64_t val1 = *(const uint64_t *)a; @@ -1146,7 +1180,26 @@ static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, } } -static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) +static void kvm_riscv_read_csr_cfg(struct kvm_reg_list *reglist) +{ + struct kvm_reg_list *reg_search; + uint64_t reg_id; + + for (int i = 0; i < ARRAY_SIZE(kvm_csr_cfgs); i++) { + KVMCPUConfig *csr_cfg = &kvm_csr_cfgs[i]; + + reg_id = csr_cfg->kvm_reg_id; + reg_search = bsearch(®_id, reglist->reg, reglist->n, + sizeof(uint64_t), uint64_cmp); + if (!reg_search) { + continue; + } + + csr_cfg->supported = true; + } +} + +static void kvm_riscv_init_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) { g_autofree struct kvm_reg_list *reglist = NULL; KVMCPUConfig *multi_ext_cfg; @@ -1163,7 +1216,9 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) * (EINVAL). Use read_legacy() in this case. */ if (errno == EINVAL) { - return kvm_riscv_read_multiext_legacy(cpu, kvmcpu); + kvm_riscv_read_multiext_legacy(cpu, kvmcpu); + kvm_riscv_read_csr_cfg_legacy(kvmcpu); + return; } else if (errno != E2BIG) { /* * E2BIG is an expected error message for the API since we @@ -1226,6 +1281,7 @@ static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu) } kvm_riscv_check_sbi_dbcn_support(cpu, reglist); + kvm_riscv_read_csr_cfg(reglist); } static void riscv_init_kvm_registers(Object *cpu_obj) @@ -1239,7 +1295,7 @@ static void riscv_init_kvm_registers(Object *cpu_obj) kvm_riscv_init_machine_ids(cpu, &kvmcpu); kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu); - kvm_riscv_init_multiext_cfg(cpu, &kvmcpu); + kvm_riscv_init_cfg(cpu, &kvmcpu); kvm_riscv_destroy_scratch_vcpu(&kvmcpu); } From 2f6357ca962568d2127ee97630997c31543bf0c0 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 13 May 2025 23:16:51 -0400 Subject: [PATCH 050/136] i386/tcg: Make CPUID_HT and CPUID_EXT3_CMP_LEG supported Since commit c6bd2dd63420 ("i386/cpu: Set up CPUID_HT in x86_cpu_expand_features() instead of cpu_x86_cpuid()") and commit 99a637a86f55 ("i386/cpu: Set and track CPUID_EXT3_CMP_LEG in env->features[FEAT_8000_0001_ECX]"), it gets warnings when booting the VM with vcpus >= 2 and with tcg: qemu-system-x86_64: warning: TCG doesn't support requested feature: CPUID.01H:EDX.ht [bit 28] qemu-system-x86_64: warning: TCG doesn't support requested feature: CPUID.80000001H:ECX.cmp-legacy [bit 1] This is because, after the two commits, CPUID_HT and CPUID_EXT3_CMP_LEG are set in env->features[] when vcpus >=2 (in x86_cpu_expand_features()) later in x86_cpu_filter_features() it will check against the TCG supported bits. However, current TCG doesn't mark the two bits as supported, hence the warnings. Fix it by adding the two bits to the supported bits of TCG since multiple vcpus are supported by TCG. Fixes: c6bd2dd63420 ("i386/cpu: Set up CPUID_HT in x86_cpu_expand_features() instead of cpu_x86_cpuid()") Fixes: 99a637a86f55 ("i386/cpu: Set and track CPUID_EXT3_CMP_LEG in env->features[FEAT_8000_0001_ECX]") Reported-by: Ewan Hai Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250514031652.838763-2-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini (cherry picked from commit 5979f50fa9fdbb3fb49e2b498f84faa7503c8ed1) Signed-off-by: Michael Tokarev --- target/i386/cpu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3fb1ec62da..5e12cba1b8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -774,11 +774,12 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \ CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \ CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \ - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE) + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \ + CPUID_HT) /* partly implemented: CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */ /* missing: - CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ + CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_TM, CPUID_PBE */ /* * Kernel-only features that can be shown to usermode programs even if @@ -846,7 +847,8 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ - CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES) + CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES | \ + CPUID_EXT3_CMP_LEG) #define TCG_EXT4_FEATURES 0 From 3e59fccb03eee912867b2d58dc885d48f44cec78 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 13 May 2025 23:16:52 -0400 Subject: [PATCH 051/136] i386/hvf: Make CPUID_HT supported Since Commit c6bd2dd63420 ("i386/cpu: Set up CPUID_HT in x86_cpu_expand_features() instead of cpu_x86_cpuid()"), CPUID_HT will be set in env->features[] in x86_cpu_expand_features() when vcpus >= 2. Later in x86_cpu_filter_features() it will check against the HVF supported bits. It will trigger the warning like qemu-system-x86_64: warning: host doesn't support requested feature: CPUID.01H:EDX.ht [bit 28] Add CPUID_HT to HVF supported CPUID bits to fix it. Signed-off-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250514031652.838763-3-xiaoyao.li@intel.com Signed-off-by: Paolo Bonzini (cherry picked from commit 7a48612306768833f8cc87418a5a53e712f26ac1) Signed-off-by: Michael Tokarev --- target/i386/hvf/x86_cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index ae836f65cc..c59cc25f3d 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -73,7 +73,7 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | - CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS; + CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_HT; ecx &= CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_MOVBE | From cdf3fb061719f32d46ee620fadea37b8e55771d0 Mon Sep 17 00:00:00 2001 From: Rakesh Jeyasingh Date: Tue, 29 Apr 2025 22:33:53 +0530 Subject: [PATCH 052/136] hw/pci-host/gt64120: Fix endianness handling The GT-64120 PCI controller requires special handling where: 1. Host bridge(bus 0 ,device 0) must never be byte-swapped 2. Other devices follow MByteSwap bit in GT_PCI0_CMD The previous implementation incorrectly swapped all accesses, breaking host bridge detection (lspci -d 11ab:4620). Changes made: 1. Removed gt64120_update_pci_cfgdata_mapping() and moved data_mem initialization to gt64120_realize() for cleaner setup 2. Implemented custom read/write handlers that: - Preserve host bridge accesses (extract32(config_reg,11,13)==0) - apply swapping only for non-bridge devices in big-endian mode Fixes: 145e2198 ("hw/mips/gt64xxx_pci: Endian-swap using PCI_HOST_BRIDGE MemoryRegionOps") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2826 Signed-off-by: Rakesh Jeyasingh Tested-by: Thomas Huth Link: https://lore.kernel.org/r/20250429170354.150581-2-rakeshjb010@gmail.com Signed-off-by: Paolo Bonzini (cherry picked from commit e5894fd6f411c113e2b5f62811e96eeb5b084381) Signed-off-by: Michael Tokarev --- hw/pci-host/gt64120.c | 82 +++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c index d5c13a89b6..7ad44cf2da 100644 --- a/hw/pci-host/gt64120.c +++ b/hw/pci-host/gt64120.c @@ -320,38 +320,6 @@ static void gt64120_isd_mapping(GT64120State *s) memory_region_transaction_commit(); } -static void gt64120_update_pci_cfgdata_mapping(GT64120State *s) -{ - /* Indexed on MByteSwap bit, see Table 158: PCI_0 Command, Offset: 0xc00 */ - static const MemoryRegionOps *pci_host_data_ops[] = { - &pci_host_data_be_ops, &pci_host_data_le_ops - }; - PCIHostState *phb = PCI_HOST_BRIDGE(s); - - memory_region_transaction_begin(); - - /* - * The setting of the MByteSwap bit and MWordSwap bit in the PCI Internal - * Command Register determines how data transactions from the CPU to/from - * PCI are handled along with the setting of the Endianness bit in the CPU - * Configuration Register. See: - * - Table 16: 32-bit PCI Transaction Endianness - * - Table 158: PCI_0 Command, Offset: 0xc00 - */ - - if (memory_region_is_mapped(&phb->data_mem)) { - memory_region_del_subregion(&s->ISD_mem, &phb->data_mem); - object_unparent(OBJECT(&phb->data_mem)); - } - memory_region_init_io(&phb->data_mem, OBJECT(phb), - pci_host_data_ops[s->regs[GT_PCI0_CMD] & 1], - s, "pci-conf-data", 4); - memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2, - &phb->data_mem, 1); - - memory_region_transaction_commit(); -} - static void gt64120_pci_mapping(GT64120State *s) { memory_region_transaction_begin(); @@ -645,7 +613,6 @@ static void gt64120_writel(void *opaque, hwaddr addr, case GT_PCI0_CMD: case GT_PCI1_CMD: s->regs[saddr] = val & 0x0401fc0f; - gt64120_update_pci_cfgdata_mapping(s); break; case GT_PCI0_TOR: case GT_PCI0_BS_SCS10: @@ -1024,6 +991,48 @@ static const MemoryRegionOps isd_mem_ops = { }, }; +static bool bswap(const GT64120State *s) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(s); + /*check for bus == 0 && device == 0, Bits 11:15 = Device , Bits 16:23 = Bus*/ + bool is_phb_dev0 = extract32(phb->config_reg, 11, 13) == 0; + bool le_mode = FIELD_EX32(s->regs[GT_PCI0_CMD], GT_PCI0_CMD, MByteSwap); + /* Only swap for non-bridge devices in big-endian mode */ + return !le_mode && !is_phb_dev0; +} + +static uint64_t gt64120_pci_data_read(void *opaque, hwaddr addr, unsigned size) +{ + GT64120State *s = opaque; + uint32_t val = pci_host_data_le_ops.read(opaque, addr, size); + + if (bswap(s)) { + val = bswap32(val); + } + return val; +} + +static void gt64120_pci_data_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + GT64120State *s = opaque; + + if (bswap(s)) { + val = bswap32(val); + } + pci_host_data_le_ops.write(opaque, addr, val, size); +} + +static const MemoryRegionOps gt64120_pci_data_ops = { + .read = gt64120_pci_data_read, + .write = gt64120_pci_data_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + static void gt64120_reset(DeviceState *dev) { GT64120State *s = GT64120_PCI_HOST_BRIDGE(dev); @@ -1178,7 +1187,6 @@ static void gt64120_reset(DeviceState *dev) gt64120_isd_mapping(s); gt64120_pci_mapping(s); - gt64120_update_pci_cfgdata_mapping(s); } static void gt64120_realize(DeviceState *dev, Error **errp) @@ -1202,6 +1210,12 @@ static void gt64120_realize(DeviceState *dev, Error **errp) memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGADDR << 2, &phb->conf_mem, 1); + memory_region_init_io(&phb->data_mem, OBJECT(phb), + >64120_pci_data_ops, + s, "pci-conf-data", 4); + memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2, + &phb->data_mem, 1); + /* * The whole address space decoded by the GT-64120A doesn't generate From ef1655962fe022b93e3f9d0aa4741e0e9e066a78 Mon Sep 17 00:00:00 2001 From: Rakesh Jeyasingh Date: Tue, 29 Apr 2025 22:33:54 +0530 Subject: [PATCH 053/136] hw/pci-host: Remove unused pci_host_data_be_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_host_data_be_ops became unused after endianness fixes Suggested-by: Paolo Bonzini Signed-off-by: Rakesh Jeyasingh Reviewed-by: Philippe Mathieu-Daudé Tested-by: Thomas Huth Link: https://lore.kernel.org/r/20250429170354.150581-3-rakeshjb010@gmail.com Signed-off-by: Paolo Bonzini (cherry picked from commit 560375cff3ccedabf1fe5ca1bc7a31b13fdc68e5) Signed-off-by: Michael Tokarev --- hw/pci/pci_host.c | 6 ------ include/hw/pci-host/dino.h | 4 ---- include/hw/pci/pci_host.h | 1 - 3 files changed, 11 deletions(-) diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 80f91f409f..56f7f28a1a 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -const MemoryRegionOps pci_host_data_be_ops = { - .read = pci_host_data_read, - .write = pci_host_data_write, - .endianness = DEVICE_BIG_ENDIAN, -}; - static bool pci_host_needed(void *opaque) { PCIHostState *s = opaque; diff --git a/include/hw/pci-host/dino.h b/include/hw/pci-host/dino.h index fd7975c798..5dc8cdf610 100644 --- a/include/hw/pci-host/dino.h +++ b/include/hw/pci-host/dino.h @@ -109,10 +109,6 @@ static const uint32_t reg800_keep_bits[DINO800_REGS] = { struct DinoState { PCIHostState parent_obj; - /* - * PCI_CONFIG_ADDR is parent_obj.config_reg, via pci_host_conf_be_ops, - * so that we can map PCI_CONFIG_DATA to pci_host_data_be_ops. - */ uint32_t config_reg_dino; /* keep original copy, including 2 lowest bits */ uint32_t iar0; diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h index e52d8ec2cd..954dd446fa 100644 --- a/include/hw/pci/pci_host.h +++ b/include/hw/pci/pci_host.h @@ -68,6 +68,5 @@ uint32_t pci_data_read(PCIBus *s, uint32_t addr, unsigned len); extern const MemoryRegionOps pci_host_conf_le_ops; extern const MemoryRegionOps pci_host_conf_be_ops; extern const MemoryRegionOps pci_host_data_le_ops; -extern const MemoryRegionOps pci_host_data_be_ops; #endif /* PCI_HOST_H */ From 5e8ef1da897e8e7b30f4e0102f2832a9a73518af Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Tue, 13 May 2025 22:31:31 +0800 Subject: [PATCH 054/136] qapi/misc-target: Fix the doc to distinguish query-sgx and query-sgx-capabilities There're 2 QMP commands: query-sgx and query-sgx-capabilities, but their outputs are very similar and the documentation lacks clear differentiation. From the codes, query-sgx is used to gather guest's SGX capabilities (including SGX related CPUIDs and EPC sections' size, in SGXInfo), and if guest doesn't have SGX, then QEMU will report the error message. On the other hand, query-sgx-capabilities is used to gather host's SGX capabilities (descripted by SGXInfo as well). And if host doesn't support SGX, then QEMU will also report the error message. Considering that SGXInfo is already documented and both these 2 commands have enough error messages (for the exception case in their codes). Therefore the QAPI documentation for these two commands only needs to emphasize that one of them applies to the guest and the other to the host. Fix their documentation to reflect this difference. Reported-by: Markus Armbruster Suggested-by: Paolo Bonzini Signed-off-by: Zhao Liu Acked-by: Markus Armbruster Link: https://lore.kernel.org/r/20250513143131.2008078-3-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini (cherry picked from commit 7f2131c35c1781ca41c62dc26fd93282e1351323) Signed-off-by: Michael Tokarev --- qapi/misc-target.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 8d70bd24d8..827515c363 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -380,7 +380,7 @@ ## # @query-sgx: # -# Returns information about SGX +# Returns information about configured SGX capabilities of guest # # Returns: @SGXInfo # @@ -399,7 +399,7 @@ ## # @query-sgx-capabilities: # -# Returns information from host SGX capabilities +# Returns information about SGX capabilities of host # # Returns: @SGXInfo # From 7484d61bdbc88923969d8d0e2e26e8ba0935f53a Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Fri, 16 May 2025 15:53:03 +0200 Subject: [PATCH 055/136] migration/multifd: Don't send device state packets with zerocopy flag If zerocopy is enabled for multifd then QIO_CHANNEL_WRITE_FLAG_ZERO_COPY flag is forced into all multifd channel write calls via p->write_flags that was setup in multifd_nocomp_send_setup(). However, device state packets aren't compatible with zerocopy - the data buffer isn't getting kept pinned until multifd channel flush. Make sure to mask that QIO_CHANNEL_WRITE_FLAG_ZERO_COPY flag in a multifd send thread if the data being sent is device state. Fixes: 0525b91a0b99 ("migration/multifd: Device state transfer support - send side") Signed-off-by: Maciej S. Szmigiero Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/3bd5f48578e29f3a78f41b1e4fbea3d4b2d9b136.1747403393.git.maciej.szmigiero@oracle.com Signed-off-by: Peter Xu (cherry picked from commit 6be7696129b302830a9cff7e30484e08c2d64b57) Signed-off-by: Michael Tokarev --- migration/multifd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/migration/multifd.c b/migration/multifd.c index dfb5189f0e..198763bada 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -695,6 +695,7 @@ static void *multifd_send_thread(void *opaque) if (qatomic_load_acquire(&p->pending_job)) { bool is_device_state = multifd_payload_device_state(p->data); size_t total_size; + int write_flags_masked = 0; p->flags = 0; p->iovs_num = 0; @@ -702,6 +703,9 @@ static void *multifd_send_thread(void *opaque) if (is_device_state) { multifd_device_state_send_prepare(p); + + /* Device state packets cannot be sent via zerocopy */ + write_flags_masked |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; } else { ret = multifd_send_state->ops->send_prepare(p, &local_err); if (ret != 0) { @@ -723,7 +727,8 @@ static void *multifd_send_thread(void *opaque) &p->data->u.ram, &local_err); } else { ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, - NULL, 0, p->write_flags, + NULL, 0, + p->write_flags & ~write_flags_masked, &local_err); } From 8a48520c1760ed6a0075bc1e3f5e0fb66d35f91c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 13 May 2025 17:33:16 -0400 Subject: [PATCH 056/136] migration: Allow caps to be set when preempt or multifd cap enabled With commit 82137e6c8c ("migration: enforce multifd and postcopy preempt to be set before incoming"), and if postcopy preempt / multifd is enabled, one cannot setup any capability because these checks would always fail. (qemu) migrate_set_capability xbzrle off Error: Postcopy preempt must be set before incoming starts To fix it, check existing cap and only raise an error if the specific cap changed. Fixes: 82137e6c8c ("migration: enforce multifd and postcopy preempt to be set before incoming") Reviewed-by: Dr. David Alan Gilbert Reviewed-by: Juraj Marcin Signed-off-by: Peter Xu (cherry picked from commit 17bec9235bb0775cf8dec4103c167757ee8898f3) Signed-off-by: Michael Tokarev --- migration/options.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/options.c b/migration/options.c index b0ac2ea408..5dc290c292 100644 --- a/migration/options.c +++ b/migration/options.c @@ -555,7 +555,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) return false; } - if (migrate_incoming_started()) { + if (!migrate_postcopy_preempt() && migrate_incoming_started()) { error_setg(errp, "Postcopy preempt must be set before incoming starts"); return false; @@ -563,7 +563,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) } if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { - if (migrate_incoming_started()) { + if (!migrate_multifd() && migrate_incoming_started()) { error_setg(errp, "Multifd must be set before incoming starts"); return false; } From 679450c92650294c77e80104760e93c7503bea78 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 May 2025 13:12:07 +0200 Subject: [PATCH 057/136] target/hppa: Copy instruction code into fr1 on FPU assist fault The hardware stores the instruction code in the lower bits of the FP exception register #1 on FP assist traps. This fixes the FP exception handler on Linux, as the Linux kernel uses the value to decide on the correct signal which should be pushed into userspace (see decode_fpu() in Linux kernel). Signed-off-by: Helge Deller (cherry picked from commit 923976dfe367b0bfed45ff660c369f3fe65604a7) Signed-off-by: Michael Tokarev --- target/hppa/int_helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index 7d48643bb6..191ae19404 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -177,6 +177,10 @@ void hppa_cpu_do_interrupt(CPUState *cs) } } env->cr[CR_IIR] = ldl_phys(cs->as, paddr); + if (i == EXCP_ASSIST) { + /* stuff insn code into bits of FP exception register #1 */ + env->fr[0] |= (env->cr[CR_IIR] & 0x03ffffff); + } } break; From 5f119c69ff8867ecea92f78cbc7e288efb3a23a9 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 May 2025 13:20:17 +0200 Subject: [PATCH 058/136] linux-user/hppa: Send proper si_code on SIGFPE exception Improve the linux-user emulation to send the correct si_code depending on overflow (TARGET_FPE_FLTOVF), underflow (TARGET_FPE_FLTUND), ... Note that the hardware stores the relevant flags in FP exception register #1, which is actually the lower 32-bits of the 64-bit fr[0] register in qemu. Signed-off-by: Helge Deller (cherry picked from commit b4b49cf39dba5f993ad925f204cb820aacfc8e45) Signed-off-by: Michael Tokarev --- linux-user/hppa/cpu_loop.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c index 890e758cd1..9abaad5ef8 100644 --- a/linux-user/hppa/cpu_loop.c +++ b/linux-user/hppa/cpu_loop.c @@ -112,7 +112,7 @@ static abi_ulong hppa_lws(CPUHPPAState *env) void cpu_loop(CPUHPPAState *env) { CPUState *cs = env_cpu(env); - abi_ulong ret; + abi_ulong ret, si_code = 0; int trapnr; while (1) { @@ -169,7 +169,15 @@ void cpu_loop(CPUHPPAState *env) force_sig_fault(TARGET_SIGFPE, TARGET_FPE_CONDTRAP, env->iaoq_f); break; case EXCP_ASSIST: - force_sig_fault(TARGET_SIGFPE, 0, env->iaoq_f); + #define set_si_code(mask, val) \ + if (env->fr[0] & mask) { si_code = val; } + set_si_code(R_FPSR_FLG_I_MASK, TARGET_FPE_FLTRES); + set_si_code(R_FPSR_FLG_U_MASK, TARGET_FPE_FLTUND); + set_si_code(R_FPSR_FLG_O_MASK, TARGET_FPE_FLTOVF); + set_si_code(R_FPSR_FLG_Z_MASK, TARGET_FPE_FLTDIV); + set_si_code(R_FPSR_FLG_V_MASK, TARGET_FPE_FLTINV); + #undef set_si_code + force_sig_fault(TARGET_SIGFPE, si_code, env->iaoq_f); break; case EXCP_BREAK: force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->iaoq_f); From 6966e0ba1d4a9e6fe76f618c2c9c73900107fdc8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 17 May 2025 13:27:48 +0200 Subject: [PATCH 059/136] target/hppa: Fix FPE exceptions Implement FP exception register #1 (lower 32-bits of 64-bit fr[0]). A proper implementation is necessary to allow the Linux kernel in system mode and the qemu linux-user to send proper si_code values on SIGFPE signal. Always set the T-bit on taken exception, and merge over- and underflow in system mode to just set overflow bit to mimic the behaviour I tested on a physical machine. The test program below can be used to verify correct behaviour. Note that behaviour on SIGFPE may vary on different platforms. The program should always detect the correct signal, but it may or may not be able to sucessfully continue afterwards. #define _GNU_SOURCE #include #include #include #include static void fpe_func(int sig, siginfo_t *i, void *v) { sigset_t set; sigemptyset(&set); sigaddset(&set, SIGFPE); sigprocmask(SIG_UNBLOCK, &set, NULL); printf("GOT signal %d with si_code %ld\n", sig, i->si_code); } int main(int argc, char *argv[]) { struct sigaction action = { .sa_sigaction = fpe_func, .sa_flags = SA_RESTART|SA_SIGINFO }; sigaction(SIGFPE, &action, 0); feenableexcept(FE_OVERFLOW | FE_UNDERFLOW); double x = DBL_MIN; return printf("%lf\n", argc > 1 ? 1.7976931348623158E308*1.7976931348623158E308 : x / 10); } Signed-off-by: Helge Deller (cherry picked from commit ebd394948de4e868cb8fc5b265a8a18f0935dce1) Signed-off-by: Michael Tokarev --- target/hppa/fpu_helper.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c index a62d9d3083..294ce0a970 100644 --- a/target/hppa/fpu_helper.c +++ b/target/hppa/fpu_helper.c @@ -95,7 +95,8 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) { uint32_t soft_exp = get_float_exception_flags(&env->fp_status); uint32_t hard_exp = 0; - uint32_t shadow = env->fr0_shadow; + uint32_t shadow = env->fr0_shadow & 0x3ffffff; + uint32_t fr1 = 0; if (likely(soft_exp == 0)) { env->fr[0] = (uint64_t)shadow << 32; @@ -108,9 +109,22 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) hard_exp |= CONVERT_BIT(soft_exp, float_flag_overflow, R_FPSR_ENA_O_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_divbyzero, R_FPSR_ENA_Z_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_invalid, R_FPSR_ENA_V_MASK); - shadow |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + if (hard_exp & shadow) { + shadow = FIELD_DP32(shadow, FPSR, T, 1); + /* fill exception register #1, which is lower 32-bits of fr[0] */ +#if !defined(CONFIG_USER_ONLY) + if (hard_exp & (R_FPSR_ENA_O_MASK | R_FPSR_ENA_U_MASK)) { + /* over- and underflow both set overflow flag only */ + fr1 = FIELD_DP32(fr1, FPSR, C, 1); + fr1 = FIELD_DP32(fr1, FPSR, FLG_O, 1); + } else +#endif + { + fr1 |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + } + } env->fr0_shadow = shadow; - env->fr[0] = (uint64_t)shadow << 32; + env->fr[0] = (uint64_t)shadow << 32 | fr1; if (hard_exp & shadow) { hppa_dynamic_excp(env, EXCP_ASSIST, ra); From 3d5b2f81fbc6b96b70271af1820674731a441ff7 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 25 Apr 2025 14:07:10 +0200 Subject: [PATCH 060/136] Drop support for Python 3.8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python 3.8 went "end of life" in October 2024 and Fedora 42 dropped this version already, so the "python" CI job is currently failing. Thus it's time to drop support for this Python version in QEMU, too. While we're at it, also look for "python3.13" in the configure script. Message-ID: <20250425120710.879518-1-thuth@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Daniel P. Berrangé Signed-off-by: Thomas Huth (cherry picked from commit d64db833d6e3cbe9ea5f36342480f920f3675cea) Signed-off-by: Michael Tokarev --- configure | 14 +++++++------- docs/about/build-platforms.rst | 2 +- python/Makefile | 8 ++++---- python/setup.cfg | 7 +++---- python/tests/minreqs.txt | 2 +- scripts/qapi/mypy.ini | 2 +- tests/docker/dockerfiles/python.docker | 1 - 7 files changed, 17 insertions(+), 19 deletions(-) diff --git a/configure b/configure index 02f1dd2311..a2f5597fa0 100755 --- a/configure +++ b/configure @@ -540,17 +540,17 @@ if test -n "$linux_arch" && ! test -d "$source_path/linux-headers/asm-$linux_arc fi check_py_version() { - # We require python >= 3.8. + # We require python >= 3.9. # NB: a True python conditional creates a non-zero return code (Failure) - "$1" -c 'import sys; sys.exit(sys.version_info < (3,8))' + "$1" -c 'import sys; sys.exit(sys.version_info < (3,9))' } first_python= if test -z "${PYTHON}"; then # A bare 'python' is traditionally python 2.x, but some distros # have it as python 3.x, so check in both places. - for binary in python3 python python3.12 python3.11 \ - python3.10 python3.9 python3.8; do + for binary in python3 python python3.13 python3.12 python3.11 \ + python3.10 python3.9 ; do if has "$binary"; then python=$(command -v "$binary") if check_py_version "$python"; then @@ -933,7 +933,7 @@ then # If first_python is set, there was a binary somewhere even though # it was not suitable. Use it for the error message. if test -n "$first_python"; then - error_exit "Cannot use '$first_python', Python >= 3.8 is required." \ + error_exit "Cannot use '$first_python', Python >= 3.9 is required." \ "Use --python=/path/to/python to specify a supported Python." else error_exit "Python not found. Use --python=/path/to/python" @@ -941,11 +941,11 @@ then fi if ! check_py_version "$python"; then - error_exit "Cannot use '$python', Python >= 3.8 is required." \ + error_exit "Cannot use '$python', Python >= 3.9 is required." \ "Use --python=/path/to/python to specify a supported Python." \ "Maybe try:" \ " openSUSE Leap 15.3+: zypper install python39" \ - " CentOS 8: dnf install python38" + " CentOS: dnf install python3.12" fi # Resolve PATH diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst index 1552b1a704..c352a99544 100644 --- a/docs/about/build-platforms.rst +++ b/docs/about/build-platforms.rst @@ -101,7 +101,7 @@ Python runtime option of the ``configure`` script to point QEMU to a supported version of the Python runtime. - As of QEMU |version|, the minimum supported version of Python is 3.8. + As of QEMU |version|, the minimum supported version of Python is 3.9. Python build dependencies Some of QEMU's build dependencies are written in Python. Usually these diff --git a/python/Makefile b/python/Makefile index 1fa4ba2498..764b79ccb2 100644 --- a/python/Makefile +++ b/python/Makefile @@ -9,13 +9,13 @@ help: @echo "make check-minreqs:" @echo " Run tests in the minreqs virtual environment." @echo " These tests use the oldest dependencies." - @echo " Requires: Python 3.8" - @echo " Hint (Fedora): 'sudo dnf install python3.8'" + @echo " Requires: Python 3.9" + @echo " Hint (Fedora): 'sudo dnf install python3.9'" @echo "" @echo "make check-tox:" @echo " Run tests against multiple python versions." @echo " These tests use the newest dependencies." - @echo " Requires: Python 3.8 - 3.11, and tox." + @echo " Requires: Python 3.9 - 3.11, and tox." @echo " Hint (Fedora): 'sudo dnf install python3-tox python3.11'" @echo " The variable QEMU_TOX_EXTRA_ARGS can be use to pass extra" @echo " arguments to tox". @@ -59,7 +59,7 @@ PIP_INSTALL = pip install --disable-pip-version-check min-venv: $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate: setup.cfg tests/minreqs.txt @echo "VENV $(QEMU_MINVENV_DIR)" - @python3.8 -m venv $(QEMU_MINVENV_DIR) + @python3.9 -m venv $(QEMU_MINVENV_DIR) @( \ echo "ACTIVATE $(QEMU_MINVENV_DIR)"; \ . $(QEMU_MINVENV_DIR)/bin/activate; \ diff --git a/python/setup.cfg b/python/setup.cfg index cf5af7e664..c48dff280a 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -14,7 +14,6 @@ classifiers = Natural Language :: English Operating System :: OS Independent Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 @@ -23,7 +22,7 @@ classifiers = Typing :: Typed [options] -python_requires = >= 3.8 +python_requires = >= 3.9 packages = qemu.qmp qemu.machine @@ -78,7 +77,7 @@ exclude = __pycache__, [mypy] strict = True -python_version = 3.8 +python_version = 3.9 warn_unused_configs = True namespace_packages = True warn_unused_ignores = False @@ -186,7 +185,7 @@ multi_line_output=3 # of python available on your system to run this test. [tox:tox] -envlist = py38, py39, py310, py311, py312, py313 +envlist = py39, py310, py311, py312, py313 skip_missing_interpreters = true [testenv] diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt index a3f423efd8..6445407ba8 100644 --- a/python/tests/minreqs.txt +++ b/python/tests/minreqs.txt @@ -1,5 +1,5 @@ # This file lists the ***oldest possible dependencies*** needed to run -# "make check" successfully under ***Python 3.8***. It is used primarily +# "make check" successfully under ***Python 3.9***. It is used primarily # by GitLab CI to ensure that our stated minimum versions in setup.cfg # are truthful and regularly validated. # diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini index 8109470a03..c9dbcec2db 100644 --- a/scripts/qapi/mypy.ini +++ b/scripts/qapi/mypy.ini @@ -1,4 +1,4 @@ [mypy] strict = True disallow_untyped_calls = False -python_version = 3.8 +python_version = 3.9 diff --git a/tests/docker/dockerfiles/python.docker b/tests/docker/dockerfiles/python.docker index 8f0af9ef25..59e70a0248 100644 --- a/tests/docker/dockerfiles/python.docker +++ b/tests/docker/dockerfiles/python.docker @@ -15,7 +15,6 @@ ENV PACKAGES \ python3.11 \ python3.12 \ python3.13 \ - python3.8 \ python3.9 RUN dnf install -y $PACKAGES From fd0b1a5b13aa80d71e2d604e98c533a248cede9a Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Mon, 26 May 2025 10:33:49 +0300 Subject: [PATCH 061/136] Update version for 10.0.1 release Signed-off-by: Michael Tokarev --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index a13e7b9c87..1532420512 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.0.0 +10.0.1 From 6f7a53e58e0e0be8e2c35dabbf56f51582ece8e6 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 28 May 2025 10:49:31 +0300 Subject: [PATCH 062/136] Revert "Drop support for Python 3.8" This reverts commit 3d5b2f81fbc6b96b70271af1820674731a441ff7. It should not be in 10.0.x branch, since we do support python 3.8 in 10.0. Signed-off-by: Michael Tokarev --- configure | 14 +++++++------- docs/about/build-platforms.rst | 2 +- python/Makefile | 8 ++++---- python/setup.cfg | 7 ++++--- python/tests/minreqs.txt | 2 +- scripts/qapi/mypy.ini | 2 +- tests/docker/dockerfiles/python.docker | 1 + 7 files changed, 19 insertions(+), 17 deletions(-) diff --git a/configure b/configure index a2f5597fa0..02f1dd2311 100755 --- a/configure +++ b/configure @@ -540,17 +540,17 @@ if test -n "$linux_arch" && ! test -d "$source_path/linux-headers/asm-$linux_arc fi check_py_version() { - # We require python >= 3.9. + # We require python >= 3.8. # NB: a True python conditional creates a non-zero return code (Failure) - "$1" -c 'import sys; sys.exit(sys.version_info < (3,9))' + "$1" -c 'import sys; sys.exit(sys.version_info < (3,8))' } first_python= if test -z "${PYTHON}"; then # A bare 'python' is traditionally python 2.x, but some distros # have it as python 3.x, so check in both places. - for binary in python3 python python3.13 python3.12 python3.11 \ - python3.10 python3.9 ; do + for binary in python3 python python3.12 python3.11 \ + python3.10 python3.9 python3.8; do if has "$binary"; then python=$(command -v "$binary") if check_py_version "$python"; then @@ -933,7 +933,7 @@ then # If first_python is set, there was a binary somewhere even though # it was not suitable. Use it for the error message. if test -n "$first_python"; then - error_exit "Cannot use '$first_python', Python >= 3.9 is required." \ + error_exit "Cannot use '$first_python', Python >= 3.8 is required." \ "Use --python=/path/to/python to specify a supported Python." else error_exit "Python not found. Use --python=/path/to/python" @@ -941,11 +941,11 @@ then fi if ! check_py_version "$python"; then - error_exit "Cannot use '$python', Python >= 3.9 is required." \ + error_exit "Cannot use '$python', Python >= 3.8 is required." \ "Use --python=/path/to/python to specify a supported Python." \ "Maybe try:" \ " openSUSE Leap 15.3+: zypper install python39" \ - " CentOS: dnf install python3.12" + " CentOS 8: dnf install python38" fi # Resolve PATH diff --git a/docs/about/build-platforms.rst b/docs/about/build-platforms.rst index c352a99544..1552b1a704 100644 --- a/docs/about/build-platforms.rst +++ b/docs/about/build-platforms.rst @@ -101,7 +101,7 @@ Python runtime option of the ``configure`` script to point QEMU to a supported version of the Python runtime. - As of QEMU |version|, the minimum supported version of Python is 3.9. + As of QEMU |version|, the minimum supported version of Python is 3.8. Python build dependencies Some of QEMU's build dependencies are written in Python. Usually these diff --git a/python/Makefile b/python/Makefile index 764b79ccb2..1fa4ba2498 100644 --- a/python/Makefile +++ b/python/Makefile @@ -9,13 +9,13 @@ help: @echo "make check-minreqs:" @echo " Run tests in the minreqs virtual environment." @echo " These tests use the oldest dependencies." - @echo " Requires: Python 3.9" - @echo " Hint (Fedora): 'sudo dnf install python3.9'" + @echo " Requires: Python 3.8" + @echo " Hint (Fedora): 'sudo dnf install python3.8'" @echo "" @echo "make check-tox:" @echo " Run tests against multiple python versions." @echo " These tests use the newest dependencies." - @echo " Requires: Python 3.9 - 3.11, and tox." + @echo " Requires: Python 3.8 - 3.11, and tox." @echo " Hint (Fedora): 'sudo dnf install python3-tox python3.11'" @echo " The variable QEMU_TOX_EXTRA_ARGS can be use to pass extra" @echo " arguments to tox". @@ -59,7 +59,7 @@ PIP_INSTALL = pip install --disable-pip-version-check min-venv: $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate $(QEMU_MINVENV_DIR) $(QEMU_MINVENV_DIR)/bin/activate: setup.cfg tests/minreqs.txt @echo "VENV $(QEMU_MINVENV_DIR)" - @python3.9 -m venv $(QEMU_MINVENV_DIR) + @python3.8 -m venv $(QEMU_MINVENV_DIR) @( \ echo "ACTIVATE $(QEMU_MINVENV_DIR)"; \ . $(QEMU_MINVENV_DIR)/bin/activate; \ diff --git a/python/setup.cfg b/python/setup.cfg index c48dff280a..cf5af7e664 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -14,6 +14,7 @@ classifiers = Natural Language :: English Operating System :: OS Independent Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 @@ -22,7 +23,7 @@ classifiers = Typing :: Typed [options] -python_requires = >= 3.9 +python_requires = >= 3.8 packages = qemu.qmp qemu.machine @@ -77,7 +78,7 @@ exclude = __pycache__, [mypy] strict = True -python_version = 3.9 +python_version = 3.8 warn_unused_configs = True namespace_packages = True warn_unused_ignores = False @@ -185,7 +186,7 @@ multi_line_output=3 # of python available on your system to run this test. [tox:tox] -envlist = py39, py310, py311, py312, py313 +envlist = py38, py39, py310, py311, py312, py313 skip_missing_interpreters = true [testenv] diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt index 6445407ba8..a3f423efd8 100644 --- a/python/tests/minreqs.txt +++ b/python/tests/minreqs.txt @@ -1,5 +1,5 @@ # This file lists the ***oldest possible dependencies*** needed to run -# "make check" successfully under ***Python 3.9***. It is used primarily +# "make check" successfully under ***Python 3.8***. It is used primarily # by GitLab CI to ensure that our stated minimum versions in setup.cfg # are truthful and regularly validated. # diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini index c9dbcec2db..8109470a03 100644 --- a/scripts/qapi/mypy.ini +++ b/scripts/qapi/mypy.ini @@ -1,4 +1,4 @@ [mypy] strict = True disallow_untyped_calls = False -python_version = 3.9 +python_version = 3.8 diff --git a/tests/docker/dockerfiles/python.docker b/tests/docker/dockerfiles/python.docker index 59e70a0248..8f0af9ef25 100644 --- a/tests/docker/dockerfiles/python.docker +++ b/tests/docker/dockerfiles/python.docker @@ -15,6 +15,7 @@ ENV PACKAGES \ python3.11 \ python3.12 \ python3.13 \ + python3.8 \ python3.9 RUN dnf install -y $PACKAGES From ff3419cbacdc9ad0715c716afeed65bb21a2bbbc Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 28 May 2025 10:55:26 +0300 Subject: [PATCH 063/136] Update version for 10.0.2 release Signed-off-by: Michael Tokarev --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1532420512..7e2df97fe5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.0.1 +10.0.2 From c9afc732e22b0d7443f7774b7479c28c882c0b30 Mon Sep 17 00:00:00 2001 From: Jamin Lin Date: Thu, 15 May 2025 16:09:35 +0800 Subject: [PATCH 064/136] hw/misc/aspeed_hace: Ensure HASH_IRQ is always set to prevent firmware hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, if the program encounters an unsupported algorithm, it does not set the HASH_IRQ bit in the status register and send an interrupt to indicate command completion. As a result, the FW gets stuck waiting for a completion signal from the HACE module. Additionally, in do_hash_operation, if an error occurs within the conditional statement, the HASH_IRQ bit is not set in the status register. This causes the firmware to continuously send HASH commands, as it is unaware that the HACE model has completed processing the command. To fix this, the HASH_IRQ bit in the status register must always be set to ensure that the firmware receives an interrupt from the HACE module, preventing it from getting stuck or repeatedly sending HASH commands. Signed-off-by: Jamin Lin Fixes: c5475b3 ("hw: Model ASPEED's Hash and Crypto Engine") Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/20250515081008.583578-4-jamin_lin@aspeedtech.com Signed-off-by: Cédric Le Goater (cherry picked from commit fb8e59abbe46957cd599bb9aa9221fad1e4e989e) Signed-off-by: Michael Tokarev --- hw/misc/aspeed_hace.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c index d75da33353..96997a03fc 100644 --- a/hw/misc/aspeed_hace.c +++ b/hw/misc/aspeed_hace.c @@ -301,12 +301,6 @@ static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode, iov[i - 1].iov_len, false, iov[i - 1].iov_len); } - - /* - * Set status bits to indicate completion. Testing shows hardware sets - * these irrespective of HASH_IRQ_EN. - */ - s->regs[R_STATUS] |= HASH_IRQ; } static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size) @@ -390,10 +384,16 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid hash algorithm selection 0x%"PRIx64"\n", __func__, data & ahc->hash_mask); - break; + } else { + do_hash_operation(s, algo, data & HASH_SG_EN, + ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM)); } - do_hash_operation(s, algo, data & HASH_SG_EN, - ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM)); + + /* + * Set status bits to indicate completion. Testing shows hardware sets + * these irrespective of HASH_IRQ_EN. + */ + s->regs[R_STATUS] |= HASH_IRQ; if (data & HASH_IRQ_EN) { qemu_irq_raise(s->irq); From 80a03b51b2a679e944c351eadbb391abc2f196e6 Mon Sep 17 00:00:00 2001 From: Jamin Lin Date: Thu, 22 May 2025 10:33:04 +0800 Subject: [PATCH 065/136] hw/arm/aspeed_ast27x0: Fix RAM size detection failure on BE hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On big-endian hosts, the aspeed_ram_capacity_write() function previously passed the address of a 64-bit "data" variable directly to address_space_write(), assuming host and guest endianness matched. However, the data is expected to be written in little-endian format to DRAM. On big-endian hosts, this led to incorrect data being written into DRAM, which caused the guest firmware to misdetect the DRAM size. As a result, U-Boot fails to boot and hangs. - Replaces the "address_space_write()" call with "address_space_stl_le()", which performs an explicit 32-bit little-endian write. - Updating the MemoryRegionOps to restrict access to exactly 4 bytes using .valid.{min,max}_access_size = 4 and .impl.min_access_size = 4. Signed-off-by: Jamin Lin Fixes: 7436db1 ("aspeed/soc: fix incorrect dram size for AST2700") Reviewed-by: Cédric Le Goater Link: https://lore.kernel.org/qemu-devel/20250522023305.2486536-4-jamin_lin@aspeedtech.com Signed-off-by: Cédric Le Goater (cherry picked from commit e6941ac106190490d8b455eedc5b368e6d94d4cc) Signed-off-by: Michael Tokarev --- hw/arm/aspeed_ast27x0.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c index dce7255a2c..b810891b16 100644 --- a/hw/arm/aspeed_ast27x0.c +++ b/hw/arm/aspeed_ast27x0.c @@ -325,8 +325,9 @@ static void aspeed_ram_capacity_write(void *opaque, hwaddr addr, uint64_t data, * If writes the data to the address which is beyond the ram size, * it would write the data to the "address % ram_size". */ - result = address_space_write(&s->dram_as, addr % ram_size, - MEMTXATTRS_UNSPECIFIED, &data, 4); + address_space_stl_le(&s->dram_as, addr % ram_size, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM write failed, addr:0x%" HWADDR_PRIx @@ -339,9 +340,10 @@ static const MemoryRegionOps aspeed_ram_capacity_ops = { .read = aspeed_ram_capacity_read, .write = aspeed_ram_capacity_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { - .min_access_size = 1, - .max_access_size = 8, + .min_access_size = 4, + .max_access_size = 4, }, }; From 6eea0cd5114ffc491a8bf73fd682676638f037e2 Mon Sep 17 00:00:00 2001 From: Weifeng Liu Date: Sun, 11 May 2025 15:33:11 +0800 Subject: [PATCH 066/136] ui/gtk: Document scale and coordinate handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existence of multiple scaling factors forces us to deal with various coordinate systems and this would be confusing. It would be beneficial to define the concepts clearly and use consistent representation for variables in different coordinates. Signed-off-by: Weifeng Liu Message-ID: <20250511073337.876650-2-weifeng.liu.z@gmail.com> Acked-by: Gerd Hoffmann Acked-by: Marc-André Lureau (cherry picked from commit 9498e2f7e1a247557cfa0f830a86c398a23c6809) Signed-off-by: Michael Tokarev --- ui/gtk.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/ui/gtk.c b/ui/gtk.c index 59bda83da6..582841e031 100644 --- a/ui/gtk.c +++ b/ui/gtk.c @@ -800,6 +800,71 @@ void gd_update_monitor_refresh_rate(VirtualConsole *vc, GtkWidget *widget) #endif } +/** + * DOC: Coordinate handling. + * + * We are coping with sizes and positions in various coordinates and the + * handling of these coordinates is somewhat confusing. It would benefit us + * all if we define these coordinates explicitly and clearly. Besides, it's + * also helpful to follow the same naming convention for variables + * representing values in different coordinates. + * + * I. Definitions + * + * - (guest) buffer coordinate: this is the coordinates that the guest will + * see. The x/y offsets and width/height specified in commands sent by + * guest is basically in buffer coordinate. + * + * - (host) pixel coordinate: this is the coordinate in pixel level on the + * host destop. A window/widget of width 300 in pixel coordinate means it + * occupies 300 pixels horizontally. + * + * - (host) logical window coordinate: the existence of global scaling + * factor in desktop level makes this kind of coordinate play a role. It + * always holds that (logical window size) * (global scale factor) = + * (pixel size). + * + * - global scale factor: this is specified in desktop level and is + * typically invariant during the life cycle of the process. Users with + * high-DPI monitors might set this scale, for example, to 2, in order to + * make the UI look larger. + * + * - zooming scale: this can be freely controlled by the QEMU user to zoom + * in/out the guest content. + * + * II. Representation + * + * We'd like to use consistent representation for variables in different + * coordinates: + * - buffer coordinate: prefix fb + * - pixel coordinate: prefix p + * - logical window coordinate: prefix w + * + * For scales: + * - global scale factor: prefix gs + * - zooming scale: prefix scale/s + * + * Example: fbw, pw, ww for width in different coordinates + * + * III. Equation + * + * - fbw * gs * scale_x = pw + * - pw = gs * ww + * + * Consequently we have + * + * - fbw * scale_x = ww + * + * Example: assuming we are running QEMU on a 3840x2160 screen and have set + * global scaling factor to 2, if the guest buffer size is 1920x1080 and the + * zooming scale is 0.5, then we have: + * - fbw = 1920, fbh = 1080 + * - pw = 1920, ph = 1080 + * - ww = 960, wh = 540 + * A bonus of this configuration is that we can achieve pixel to pixel + * presentation of the guest content. + */ + static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) { VirtualConsole *vc = opaque; From 91fec603bdfb07b2339fd14c31abddb608974bb7 Mon Sep 17 00:00:00 2001 From: Weifeng Liu Date: Sun, 11 May 2025 15:33:12 +0800 Subject: [PATCH 067/136] ui/gtk: Use consistent naming for variables in different coordinates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we've documented definitions and presentation of various coordinates, let's enforce the rules. Signed-off-by: Weifeng Liu Message-ID: <20250511073337.876650-3-weifeng.liu.z@gmail.com> Acked-by: Gerd Hoffmann Acked-by: Marc-André Lureau (cherry picked from commit 3a6b314409b42fe7c46c2bd80cfc2a6744d414fe) Signed-off-by: Michael Tokarev --- ui/gtk-egl.c | 12 +++-- ui/gtk-gl-area.c | 14 ++--- ui/gtk.c | 133 ++++++++++++++++++++++++----------------------- 3 files changed, 82 insertions(+), 77 deletions(-) diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c index f7a428c86a..947c99334b 100644 --- a/ui/gtk-egl.c +++ b/ui/gtk-egl.c @@ -70,16 +70,18 @@ void gd_egl_draw(VirtualConsole *vc) QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; int fence_fd; #endif - int ww, wh, ws; + int ww, wh, pw, ph, gs; if (!vc->gfx.gls) { return; } window = gtk_widget_get_window(vc->gfx.drawing_area); - ws = gdk_window_get_scale_factor(window); - ww = gdk_window_get_width(window) * ws; - wh = gdk_window_get_height(window) * ws; + gs = gdk_window_get_scale_factor(window); + ww = gdk_window_get_width(window); + wh = gdk_window_get_height(window); + pw = ww * gs; + ph = wh * gs; if (vc->gfx.scanout_mode) { #ifdef CONFIG_GBM @@ -115,7 +117,7 @@ void gd_egl_draw(VirtualConsole *vc) eglMakeCurrent(qemu_egl_display, vc->gfx.esurface, vc->gfx.esurface, vc->gfx.ectx); - surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh); + surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, pw, ph); surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds); eglSwapBuffers(qemu_egl_display, vc->gfx.esurface); diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index 2c9a0db425..ba9fbec432 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -42,16 +42,16 @@ void gd_gl_area_draw(VirtualConsole *vc) #ifdef CONFIG_GBM QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; #endif - int ww, wh, ws, y1, y2; + int pw, ph, gs, y1, y2; if (!vc->gfx.gls) { return; } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); - ws = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); - ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * ws; - wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * ws; + gs = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); + pw = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * gs; + ph = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * gs; if (vc->gfx.scanout_mode) { if (!vc->gfx.guest_fb.framebuffer) { @@ -71,11 +71,11 @@ void gd_gl_area_draw(VirtualConsole *vc) glBindFramebuffer(GL_READ_FRAMEBUFFER, vc->gfx.guest_fb.framebuffer); /* GtkGLArea sets GL_DRAW_FRAMEBUFFER for us */ - glViewport(0, 0, ww, wh); + glViewport(0, 0, pw, ph); y1 = vc->gfx.y0_top ? 0 : vc->gfx.h; y2 = vc->gfx.y0_top ? vc->gfx.h : 0; glBlitFramebuffer(0, y1, vc->gfx.w, y2, - 0, 0, ww, wh, + 0, 0, pw, ph, GL_COLOR_BUFFER_BIT, GL_NEAREST); #ifdef CONFIG_GBM if (dmabuf) { @@ -101,7 +101,7 @@ void gd_gl_area_draw(VirtualConsole *vc) } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); - surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh); + surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, pw, ph); surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds); } } diff --git a/ui/gtk.c b/ui/gtk.c index 582841e031..956d4ab9d1 100644 --- a/ui/gtk.c +++ b/ui/gtk.c @@ -387,16 +387,16 @@ static void *gd_win32_get_hwnd(VirtualConsole *vc) /** DisplayState Callbacks **/ static void gd_update(DisplayChangeListener *dcl, - int x, int y, int w, int h) + int fbx, int fby, int fbw, int fbh) { VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl); GdkWindow *win; - int x1, x2, y1, y2; - int mx, my; - int fbw, fbh; - int ww, wh; + int wx1, wx2, wy1, wy2; + int wx_offset, wy_offset; + int ww_surface, wh_surface; + int ww_widget, wh_widget; - trace_gd_update(vc->label, x, y, w, h); + trace_gd_update(vc->label, fbx, fby, fbw, fbh); if (!gtk_widget_get_realized(vc->gfx.drawing_area)) { return; @@ -405,35 +405,36 @@ static void gd_update(DisplayChangeListener *dcl, if (vc->gfx.convert) { pixman_image_composite(PIXMAN_OP_SRC, vc->gfx.ds->image, NULL, vc->gfx.convert, - x, y, 0, 0, x, y, w, h); + fbx, fby, 0, 0, fbx, fby, fbw, fbh); } - x1 = floor(x * vc->gfx.scale_x); - y1 = floor(y * vc->gfx.scale_y); + wx1 = floor(fbx * vc->gfx.scale_x); + wy1 = floor(fby * vc->gfx.scale_y); - x2 = ceil(x * vc->gfx.scale_x + w * vc->gfx.scale_x); - y2 = ceil(y * vc->gfx.scale_y + h * vc->gfx.scale_y); + wx2 = ceil(fbx * vc->gfx.scale_x + fbw * vc->gfx.scale_x); + wy2 = ceil(fby * vc->gfx.scale_y + fbh * vc->gfx.scale_y); - fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x; - fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y; + ww_surface = surface_width(vc->gfx.ds) * vc->gfx.scale_x; + wh_surface = surface_height(vc->gfx.ds) * vc->gfx.scale_y; win = gtk_widget_get_window(vc->gfx.drawing_area); if (!win) { return; } - ww = gdk_window_get_width(win); - wh = gdk_window_get_height(win); + ww_widget = gdk_window_get_width(win); + wh_widget = gdk_window_get_height(win); - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; + wx_offset = wy_offset = 0; + if (ww_widget > ww_surface) { + wx_offset = (ww_widget - ww_surface) / 2; } - if (wh > fbh) { - my = (wh - fbh) / 2; + if (wh_widget > wh_surface) { + wy_offset = (wh_widget - wh_surface) / 2; } gtk_widget_queue_draw_area(vc->gfx.drawing_area, - mx + x1, my + y1, (x2 - x1), (y2 - y1)); + wx_offset + wx1, wy_offset + wy1, + (wx2 - wx1), (wy2 - wy1)); } static void gd_refresh(DisplayChangeListener *dcl) @@ -869,8 +870,8 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) { VirtualConsole *vc = opaque; GtkDisplayState *s = vc->s; - int mx, my; - int ww, wh; + int wx_offset, wy_offset; + int ww_widget, wh_widget, ww_surface, wh_surface; int fbw, fbh; #if defined(CONFIG_OPENGL) @@ -904,46 +905,47 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) fbw = surface_width(vc->gfx.ds); fbh = surface_height(vc->gfx.ds); - ww = gdk_window_get_width(gtk_widget_get_window(widget)); - wh = gdk_window_get_height(gtk_widget_get_window(widget)); + ww_widget = gdk_window_get_width(gtk_widget_get_window(widget)); + wh_widget = gdk_window_get_height(gtk_widget_get_window(widget)); if (s->full_screen) { - vc->gfx.scale_x = (double)ww / fbw; - vc->gfx.scale_y = (double)wh / fbh; + vc->gfx.scale_x = (double)ww_widget / fbw; + vc->gfx.scale_y = (double)wh_widget / fbh; } else if (s->free_scale) { double sx, sy; - sx = (double)ww / fbw; - sy = (double)wh / fbh; + sx = (double)ww_widget / fbw; + sy = (double)wh_widget / fbh; vc->gfx.scale_x = vc->gfx.scale_y = MIN(sx, sy); } - fbw *= vc->gfx.scale_x; - fbh *= vc->gfx.scale_y; + ww_surface = fbw * vc->gfx.scale_x; + wh_surface = fbh * vc->gfx.scale_y; - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; + wx_offset = wy_offset = 0; + if (ww_widget > ww_surface) { + wx_offset = (ww_widget - ww_surface) / 2; } - if (wh > fbh) { - my = (wh - fbh) / 2; + if (wh_widget > wh_surface) { + wy_offset = (wh_widget - wh_surface) / 2; } - cairo_rectangle(cr, 0, 0, ww, wh); + cairo_rectangle(cr, 0, 0, ww_widget, wh_widget); /* Optionally cut out the inner area where the pixmap will be drawn. This avoids 'flashing' since we're not double-buffering. Note we're using the undocumented behaviour of drawing the rectangle from right to left to cut out the whole */ - cairo_rectangle(cr, mx + fbw, my, - -1 * fbw, fbh); + cairo_rectangle(cr, wx_offset + ww_surface, wy_offset, + -1 * ww_surface, wh_surface); cairo_fill(cr); cairo_scale(cr, vc->gfx.scale_x, vc->gfx.scale_y); cairo_set_source_surface(cr, vc->gfx.surface, - mx / vc->gfx.scale_x, my / vc->gfx.scale_y); + wx_offset / vc->gfx.scale_x, + wy_offset / vc->gfx.scale_y); cairo_paint(cr); return TRUE; @@ -954,19 +956,19 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, { VirtualConsole *vc = opaque; GtkDisplayState *s = vc->s; - int x, y; - int mx, my; - int fbh, fbw; - int ww, wh; + int fbx, fby; + int wx_offset, wy_offset; + int wh_surface, ww_surface; + int ww_widget, wh_widget; if (!vc->gfx.ds) { return TRUE; } - fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x; - fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y; - ww = gtk_widget_get_allocated_width(widget); - wh = gtk_widget_get_allocated_height(widget); + ww_surface = surface_width(vc->gfx.ds) * vc->gfx.scale_x; + wh_surface = surface_height(vc->gfx.ds) * vc->gfx.scale_y; + ww_widget = gtk_widget_get_allocated_width(widget); + wh_widget = gtk_widget_get_allocated_height(widget); /* * `widget` may not have the same size with the frame buffer. @@ -974,41 +976,42 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, * To achieve that, `vc` will be displayed at (mx, my) * so that it is displayed at the center of the widget. */ - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; + wx_offset = wy_offset = 0; + if (ww_widget > ww_surface) { + wx_offset = (ww_widget - ww_surface) / 2; } - if (wh > fbh) { - my = (wh - fbh) / 2; + if (wh_widget > wh_surface) { + wy_offset = (wh_widget - wh_surface) / 2; } /* * `motion` is reported in `widget` coordinates * so translating it to the coordinates in `vc`. */ - x = (motion->x - mx) / vc->gfx.scale_x; - y = (motion->y - my) / vc->gfx.scale_y; + fbx = (motion->x - wx_offset) / vc->gfx.scale_x; + fby = (motion->y - wy_offset) / vc->gfx.scale_y; - trace_gd_motion_event(ww, wh, gtk_widget_get_scale_factor(widget), x, y); + trace_gd_motion_event(ww_widget, wh_widget, + gtk_widget_get_scale_factor(widget), fbx, fby); if (qemu_input_is_absolute(vc->gfx.dcl.con)) { - if (x < 0 || y < 0 || - x >= surface_width(vc->gfx.ds) || - y >= surface_height(vc->gfx.ds)) { + if (fbx < 0 || fby < 0 || + fbx >= surface_width(vc->gfx.ds) || + fby >= surface_height(vc->gfx.ds)) { return TRUE; } - qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_X, x, + qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_X, fbx, 0, surface_width(vc->gfx.ds)); - qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_Y, y, + qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_Y, fby, 0, surface_height(vc->gfx.ds)); qemu_input_event_sync(); } else if (s->last_set && s->ptr_owner == vc) { - qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_X, x - s->last_x); - qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_Y, y - s->last_y); + qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_X, fbx - s->last_x); + qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_Y, fby - s->last_y); qemu_input_event_sync(); } - s->last_x = x; - s->last_y = y; + s->last_x = fbx; + s->last_y = fby; s->last_set = TRUE; if (!qemu_input_is_absolute(vc->gfx.dcl.con) && s->ptr_owner == vc) { From 43fecc5dde342ec9f04ee8a28d2b09f4cfe1450d Mon Sep 17 00:00:00 2001 From: Weifeng Liu Date: Sun, 11 May 2025 15:33:13 +0800 Subject: [PATCH 068/136] gtk/ui: Introduce helper gd_update_scale MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code snippet updating scale_x/scale_y is general and will be used in next patch. Make it a function. Signed-off-by: Weifeng Liu Message-ID: <20250511073337.876650-4-weifeng.liu.z@gmail.com> Acked-by: Gerd Hoffmann Acked-by: Marc-André Lureau (cherry picked from commit a19665448156f17b52b7f33e7960d57efcfca067) Signed-off-by: Michael Tokarev --- include/ui/gtk.h | 2 ++ ui/gtk.c | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/include/ui/gtk.h b/include/ui/gtk.h index aa3d637029..d3944046db 100644 --- a/include/ui/gtk.h +++ b/include/ui/gtk.h @@ -224,4 +224,6 @@ int gd_gl_area_make_current(DisplayGLCtx *dgc, /* gtk-clipboard.c */ void gd_clipboard_init(GtkDisplayState *gd); +void gd_update_scale(VirtualConsole *vc, int ww, int wh, int fbw, int fbh); + #endif /* UI_GTK_H */ diff --git a/ui/gtk.c b/ui/gtk.c index 956d4ab9d1..ea3f403b02 100644 --- a/ui/gtk.c +++ b/ui/gtk.c @@ -801,6 +801,24 @@ void gd_update_monitor_refresh_rate(VirtualConsole *vc, GtkWidget *widget) #endif } +void gd_update_scale(VirtualConsole *vc, int ww, int wh, int fbw, int fbh) +{ + if (!vc) { + return; + } + + if (vc->s->full_screen) { + vc->gfx.scale_x = (double)ww / fbw; + vc->gfx.scale_y = (double)wh / fbh; + } else if (vc->s->free_scale) { + double sx, sy; + + sx = (double)ww / fbw; + sy = (double)wh / fbh; + + vc->gfx.scale_x = vc->gfx.scale_y = MIN(sx, sy); + } +} /** * DOC: Coordinate handling. * @@ -908,17 +926,7 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) ww_widget = gdk_window_get_width(gtk_widget_get_window(widget)); wh_widget = gdk_window_get_height(gtk_widget_get_window(widget)); - if (s->full_screen) { - vc->gfx.scale_x = (double)ww_widget / fbw; - vc->gfx.scale_y = (double)wh_widget / fbh; - } else if (s->free_scale) { - double sx, sy; - - sx = (double)ww_widget / fbw; - sy = (double)wh_widget / fbh; - - vc->gfx.scale_x = vc->gfx.scale_y = MIN(sx, sy); - } + gd_update_scale(vc, ww_widget, wh_widget, fbw, fbh); ww_surface = fbw * vc->gfx.scale_x; wh_surface = fbh * vc->gfx.scale_y; From a5a805068a2ca0dabf71f05c56bdbdba7360f985 Mon Sep 17 00:00:00 2001 From: Weifeng Liu Date: Sun, 11 May 2025 15:33:14 +0800 Subject: [PATCH 069/136] ui/gtk: Update scales in fixed-scale mode when rendering GL area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When gl=on, scale_x and scale_y were set to 1 on startup that didn't reflect the real situation of the scan-out in free scale mode, resulting in incorrect cursor coordinates to be sent when moving the mouse pointer. Simply updating the scales before rendering the image fixes this issue. Signed-off-by: Weifeng Liu Message-ID: <20250511073337.876650-5-weifeng.liu.z@gmail.com> Acked-by: Gerd Hoffmann Acked-by: Marc-André Lureau (cherry picked from commit 8fb072472c38cb1778c5b0bebf535a8b13533857) Signed-off-by: Michael Tokarev --- ui/gtk-gl-area.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index ba9fbec432..db93cd6204 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -43,6 +43,8 @@ void gd_gl_area_draw(VirtualConsole *vc) QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; #endif int pw, ph, gs, y1, y2; + int ww, wh; + int fbw, fbh; if (!vc->gfx.gls) { return; @@ -50,8 +52,14 @@ void gd_gl_area_draw(VirtualConsole *vc) gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); gs = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); - pw = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * gs; - ph = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * gs; + fbw = surface_width(vc->gfx.ds); + fbh = surface_height(vc->gfx.ds); + ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area); + wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area); + pw = ww * gs; + ph = wh * gs; + + gd_update_scale(vc, ww, wh, fbw, fbh); if (vc->gfx.scanout_mode) { if (!vc->gfx.guest_fb.framebuffer) { From 613f05a4c653761550b1d5a5ff8993d1f8e22230 Mon Sep 17 00:00:00 2001 From: Weifeng Liu Date: Sun, 11 May 2025 15:33:15 +0800 Subject: [PATCH 070/136] ui/sdl: Consider scaling in mouse event handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using sdl display backend, if the window is scaled, incorrect mouse positions will be reported since scaling is not properly handled. Fix it by transforming the positions from window coordinate to guest buffer coordinate. Signed-off-by: Weifeng Liu Message-ID: <20250511073337.876650-6-weifeng.liu.z@gmail.com> Acked-by: Gerd Hoffmann Acked-by: Marc-André Lureau (cherry picked from commit 30aa105640b0a2a541744b6584d57c9a4b86debd) Signed-off-by: Michael Tokarev --- ui/sdl2.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ui/sdl2.c b/ui/sdl2.c index cda4293a53..b00e421f7f 100644 --- a/ui/sdl2.c +++ b/ui/sdl2.c @@ -488,14 +488,14 @@ static void handle_mousemotion(SDL_Event *ev) { int max_x, max_y; struct sdl2_console *scon = get_scon_from_window(ev->motion.windowID); + int scr_w, scr_h, surf_w, surf_h, x, y, dx, dy; if (!scon || !qemu_console_is_graphic(scon->dcl.con)) { return; } + SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h); if (qemu_input_is_absolute(scon->dcl.con) || absolute_enabled) { - int scr_w, scr_h; - SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h); max_x = scr_w - 1; max_y = scr_h - 1; if (gui_grab && !gui_fullscreen @@ -509,9 +509,14 @@ static void handle_mousemotion(SDL_Event *ev) sdl_grab_start(scon); } } + surf_w = surface_width(scon->surface); + surf_h = surface_height(scon->surface); + x = (int64_t)ev->motion.x * surf_w / scr_w; + y = (int64_t)ev->motion.y * surf_h / scr_h; + dx = (int64_t)ev->motion.xrel * surf_w / scr_w; + dy = (int64_t)ev->motion.yrel * surf_h / scr_h; if (gui_grab || qemu_input_is_absolute(scon->dcl.con) || absolute_enabled) { - sdl_send_mouse_event(scon, ev->motion.xrel, ev->motion.yrel, - ev->motion.x, ev->motion.y, ev->motion.state); + sdl_send_mouse_event(scon, dx, dy, x, y, ev->motion.state); } } @@ -520,12 +525,17 @@ static void handle_mousebutton(SDL_Event *ev) int buttonstate = SDL_GetMouseState(NULL, NULL); SDL_MouseButtonEvent *bev; struct sdl2_console *scon = get_scon_from_window(ev->button.windowID); + int scr_w, scr_h, x, y; if (!scon || !qemu_console_is_graphic(scon->dcl.con)) { return; } bev = &ev->button; + SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h); + x = (int64_t)bev->x * surface_width(scon->surface) / scr_w; + y = (int64_t)bev->y * surface_height(scon->surface) / scr_h; + if (!gui_grab && !qemu_input_is_absolute(scon->dcl.con)) { if (ev->type == SDL_MOUSEBUTTONUP && bev->button == SDL_BUTTON_LEFT) { /* start grabbing all events */ @@ -537,7 +547,7 @@ static void handle_mousebutton(SDL_Event *ev) } else { buttonstate &= ~SDL_BUTTON(bev->button); } - sdl_send_mouse_event(scon, 0, 0, bev->x, bev->y, buttonstate); + sdl_send_mouse_event(scon, 0, 0, x, y, buttonstate); } } From a610978bc55f0199344a5ba6bc6927f9fef08a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Mon, 27 Jan 2025 18:45:47 +0000 Subject: [PATCH 071/136] ui/vnc.c: replace big endian flag with byte order value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will make it easier to do certain comparisons in future if we store G_BIG_ENDIAN/G_LITTLE_ENDIAN directly, instead of a boolean flag, as we can then compare directly to the G_BYTE_ORDER constant. Reviewed-by: BALATON Zoltan Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Daniel P. Berrangé (cherry picked from commit 7ed96710e82c385c6cfc3d064eec7dde20f0f3fd) Signed-off-by: Michael Tokarev --- ui/vnc-enc-tight.c | 2 +- ui/vnc-enc-zrle.c | 2 +- ui/vnc-jobs.c | 2 +- ui/vnc.c | 6 +++--- ui/vnc.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c index 41f559eb83..f8aaa8f346 100644 --- a/ui/vnc-enc-tight.c +++ b/ui/vnc-enc-tight.c @@ -150,7 +150,7 @@ tight_detect_smooth_image24(VncState *vs, int w, int h) * If client is big-endian, color samples begin from the second * byte (offset 1) of a 32-bit pixel value. */ - off = vs->client_be; + off = vs->client_endian == G_BIG_ENDIAN ? 1 : 0; memset(stats, 0, sizeof (stats)); diff --git a/ui/vnc-enc-zrle.c b/ui/vnc-enc-zrle.c index bd33b89063..97ec6c7119 100644 --- a/ui/vnc-enc-zrle.c +++ b/ui/vnc-enc-zrle.c @@ -255,7 +255,7 @@ static void zrle_write_u8(VncState *vs, uint8_t value) static int zrle_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) { - bool be = vs->client_be; + bool be = vs->client_endian == G_BIG_ENDIAN; size_t bytes; int zywrle_level; diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c index fcca7ec632..d3486af9e2 100644 --- a/ui/vnc-jobs.c +++ b/ui/vnc-jobs.c @@ -188,7 +188,7 @@ static void vnc_async_encoding_start(VncState *orig, VncState *local) local->lossy_rect = orig->lossy_rect; local->write_pixels = orig->write_pixels; local->client_pf = orig->client_pf; - local->client_be = orig->client_be; + local->client_endian = orig->client_endian; local->tight = orig->tight; local->zlib = orig->zlib; local->hextile = orig->hextile; diff --git a/ui/vnc.c b/ui/vnc.c index 9241caaad9..d47879f579 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -893,7 +893,7 @@ void vnc_convert_pixel(VncState *vs, uint8_t *buf, uint32_t v) buf[0] = v; break; case 2: - if (vs->client_be) { + if (vs->client_endian == G_BIG_ENDIAN) { buf[0] = v >> 8; buf[1] = v; } else { @@ -903,7 +903,7 @@ void vnc_convert_pixel(VncState *vs, uint8_t *buf, uint32_t v) break; default: case 4: - if (vs->client_be) { + if (vs->client_endian == G_BIG_ENDIAN) { buf[0] = v >> 24; buf[1] = v >> 16; buf[2] = v >> 8; @@ -2314,7 +2314,7 @@ static void set_pixel_format(VncState *vs, int bits_per_pixel, vs->client_pf.bits_per_pixel = bits_per_pixel; vs->client_pf.bytes_per_pixel = bits_per_pixel / 8; vs->client_pf.depth = bits_per_pixel == 32 ? 24 : bits_per_pixel; - vs->client_be = big_endian_flag; + vs->client_endian = big_endian_flag ? G_BIG_ENDIAN : G_LITTLE_ENDIAN; if (!true_color_flag) { send_color_map(vs); diff --git a/ui/vnc.h b/ui/vnc.h index acc53a2cc1..02613aa63a 100644 --- a/ui/vnc.h +++ b/ui/vnc.h @@ -323,7 +323,7 @@ struct VncState VncWritePixels *write_pixels; PixelFormat client_pf; pixman_format_code_t client_format; - bool client_be; + int client_endian; /* G_LITTLE_ENDIAN or G_BIG_ENDIAN */ CaptureVoiceOut *audio_cap; struct audsettings as; From 531cbd8e262d3e59059ea07a72d3aa71c9e94573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Mon, 27 Jan 2025 18:48:50 +0000 Subject: [PATCH 072/136] ui/vnc: take account of client byte order in pixman format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The set_pixel_conversion() method is responsible for determining whether the VNC client pixel format matches the server format, and thus whether we can use the fast path "copy" impl for sending pixels, or must use the generic impl with bit swizzling. The VNC server format is set at build time to VNC_SERVER_FB_FORMAT, which corresponds to PIXMAN_x8r8g8b8. The qemu_pixman_get_format() method is then responsible for converting the VNC pixel format into a pixman format. The VNC client pixel shifts are relative to the associated endianness. The pixman formats are always relative to the host native endianness. The qemu_pixman_get_format() method does not take into account the VNC client endianness, and is thus returning a pixman format that is only valid with the host endianness matches that of the VNC client. This has been broken since pixman was introduced to the VNC server: commit 9f64916da20eea67121d544698676295bbb105a7 Author: Gerd Hoffmann Date: Wed Oct 10 13:29:43 2012 +0200 pixman/vnc: use pixman images in vnc. The flaw can be demonstrated using the Tigervnc client by using vncviewer -AutoSelect=0 -PreferredEncoding=raw server:display connecting from a LE client to a QEMU on a BE server, or the reverse. The bug was masked, however, because almost all VNC clients will advertize support for the "tight" encoding and the QEMU VNC server will prefer "tight" if advertized. The tight_pack24 method is responsible for taking a set of pixels which have already been converted into client endianness and then repacking them into the TPIXEL format which the RFB spec defines as "TPIXEL is only 3 bytes long, where the first byte is the red component, the second byte is the green component, and the third byte is the blue component of the pixel color value" IOW, the TPIXEL format is fixed on the wire, regardless of what the VNC client declare as its endianness. Since the VNC pixel encoding code was failing to honour the endian flag of the client, the tight_pack24 method was always operating on data in native endianness. Its impl cancelled out the VNC pixel encoding bug. With the VNC pixel encoding code now fixed, the tight_pack24 method needs to take into account that it is operating on data in client endianness, not native endianness. It thus may need to invert the pixel shifts. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Daniel P. Berrangé (cherry picked from commit 70097442853c389a765c9f6502d861d182b092ae) Signed-off-by: Michael Tokarev --- include/ui/qemu-pixman.h | 4 ++-- ui/qemu-pixman.c | 15 ++++++++------- ui/vnc-enc-tight.c | 2 +- ui/vnc.c | 3 ++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/include/ui/qemu-pixman.h b/include/ui/qemu-pixman.h index 193bc046d1..2ca0ed7029 100644 --- a/include/ui/qemu-pixman.h +++ b/include/ui/qemu-pixman.h @@ -75,12 +75,12 @@ PixelFormat qemu_pixelformat_from_pixman(pixman_format_code_t format); pixman_format_code_t qemu_default_pixman_format(int bpp, bool native_endian); pixman_format_code_t qemu_drm_format_to_pixman(uint32_t drm_format); uint32_t qemu_pixman_to_drm_format(pixman_format_code_t pixman); -int qemu_pixman_get_type(int rshift, int gshift, int bshift); +int qemu_pixman_get_type(int rshift, int gshift, int bshift, int endian); bool qemu_pixman_check_format(DisplayChangeListener *dcl, pixman_format_code_t format); #ifdef CONFIG_PIXMAN -pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf); +pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf, int endian); pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format, int width); void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb, diff --git a/ui/qemu-pixman.c b/ui/qemu-pixman.c index 6ef4376f4e..ef4e71da11 100644 --- a/ui/qemu-pixman.c +++ b/ui/qemu-pixman.c @@ -126,33 +126,34 @@ uint32_t qemu_pixman_to_drm_format(pixman_format_code_t pixman_format) return 0; } -int qemu_pixman_get_type(int rshift, int gshift, int bshift) +int qemu_pixman_get_type(int rshift, int gshift, int bshift, int endian) { int type = PIXMAN_TYPE_OTHER; + bool native_endian = (endian == G_BYTE_ORDER); if (rshift > gshift && gshift > bshift) { if (bshift == 0) { - type = PIXMAN_TYPE_ARGB; + type = native_endian ? PIXMAN_TYPE_ARGB : PIXMAN_TYPE_BGRA; } else { - type = PIXMAN_TYPE_RGBA; + type = native_endian ? PIXMAN_TYPE_RGBA : PIXMAN_TYPE_ABGR; } } else if (rshift < gshift && gshift < bshift) { if (rshift == 0) { - type = PIXMAN_TYPE_ABGR; + type = native_endian ? PIXMAN_TYPE_ABGR : PIXMAN_TYPE_RGBA; } else { - type = PIXMAN_TYPE_BGRA; + type = native_endian ? PIXMAN_TYPE_BGRA : PIXMAN_TYPE_ARGB; } } return type; } #ifdef CONFIG_PIXMAN -pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf) +pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf, int endian) { pixman_format_code_t format; int type; - type = qemu_pixman_get_type(pf->rshift, pf->gshift, pf->bshift); + type = qemu_pixman_get_type(pf->rshift, pf->gshift, pf->bshift, endian); format = PIXMAN_FORMAT(pf->bits_per_pixel, type, pf->abits, pf->rbits, pf->gbits, pf->bbits); if (!pixman_format_supported_source(format)) { diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c index f8aaa8f346..a5bdc19ebb 100644 --- a/ui/vnc-enc-tight.c +++ b/ui/vnc-enc-tight.c @@ -891,7 +891,7 @@ static void tight_pack24(VncState *vs, uint8_t *buf, size_t count, size_t *ret) buf8 = buf; - if (1 /* FIXME */) { + if (vs->client_endian == G_BYTE_ORDER) { rshift = vs->client_pf.rshift; gshift = vs->client_pf.gshift; bshift = vs->client_pf.bshift; diff --git a/ui/vnc.c b/ui/vnc.c index d47879f579..c96bd8ceea 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -2242,7 +2242,8 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) static void set_pixel_conversion(VncState *vs) { - pixman_format_code_t fmt = qemu_pixman_get_format(&vs->client_pf); + pixman_format_code_t fmt = qemu_pixman_get_format(&vs->client_pf, + vs->client_endian); if (fmt == VNC_SERVER_FB_FORMAT) { vs->write_pixels = vnc_write_pixels_copy; From 9238fd846db22033cd12aca5736a25d69e1a5985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Tue, 28 Jan 2025 13:27:25 +0000 Subject: [PATCH 073/136] ui/vnc: fix tight palette pixel encoding for 8/16-bpp formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When sending a tight rectangle with the palette filter, if the client format was 8/16bpp, the colours on big endian hosts are not set as we're sending the wrong bytes. We must first cast the 32-bit colour to a 16/8-bit value, and then send the result. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Daniel P. Berrangé (cherry picked from commit 63d320909220a90647c484263ae5e2f26eb54587) Signed-off-by: Michael Tokarev --- ui/vnc-enc-tight.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c index a5bdc19ebb..25c7b2c788 100644 --- a/ui/vnc-enc-tight.c +++ b/ui/vnc-enc-tight.c @@ -1001,16 +1001,24 @@ static int send_mono_rect(VncState *vs, int x, int y, break; } case 2: - vnc_write(vs, &bg, 2); - vnc_write(vs, &fg, 2); + { + uint16_t bg16 = bg; + uint16_t fg16 = fg; + vnc_write(vs, &bg16, 2); + vnc_write(vs, &fg16, 2); tight_encode_mono_rect16(vs->tight->tight.buffer, w, h, bg, fg); break; + } default: - vnc_write_u8(vs, bg); - vnc_write_u8(vs, fg); + { + uint8_t bg8 = bg; + uint8_t fg8 = fg; + vnc_write_u8(vs, bg8); + vnc_write_u8(vs, fg8); tight_encode_mono_rect8(vs->tight->tight.buffer, w, h, bg, fg); break; } + } vs->tight->tight.offset = bytes; bytes = tight_compress_data(vs, stream, bytes, level, Z_DEFAULT_STRATEGY); From 1702044371d334bfe4e3ffd96378a419bae4f4f7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 29 May 2025 17:45:10 +0100 Subject: [PATCH 074/136] hw/arm: Add missing psci_conduit to NPCM8XX SoC boot info Without psci_conduit, the Linux kernel crashes almost immediately. psci: probing for conduit method from DT. Internal error: Oops - Undefined instruction: 0000000002000000 [#1] PREEMPT SMP Fixes: ae0c4d1a1290 ("hw/arm: Add NPCM8XX SoC") Cc: qemu-stable@nongnu.org Cc: Hao Wu Cc: Peter Maydell Signed-off-by: Guenter Roeck Message-id: 20250315142050.3642741-1-linux@roeck-us.net Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell (cherry picked from commit e6bc01777e5a4b6ecf3414b21a2d7b4846bf4817) Signed-off-by: Michael Tokarev --- hw/arm/npcm8xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c index f182accc47..e5a1929ed7 100644 --- a/hw/arm/npcm8xx.c +++ b/hw/arm/npcm8xx.c @@ -346,6 +346,7 @@ static struct arm_boot_info npcm8xx_binfo = { .secure_boot = false, .board_id = -1, .board_setup_addr = NPCM8XX_BOARD_SETUP_ADDR, + .psci_conduit = QEMU_PSCI_CONDUIT_SMC, }; void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc) From 21f86e29510a7d8f6daca523ef4d630854c70b4f Mon Sep 17 00:00:00 2001 From: Huaitong Han Date: Thu, 22 May 2025 18:05:48 +0800 Subject: [PATCH 075/136] vhost: Don't set vring call if guest notifier is unused The vring call fd is set even when the guest does not use MSI-X (e.g., in the case of virtio PMD), leading to unnecessary CPU overhead for processing interrupts. The commit 96a3d98d2c("vhost: don't set vring call if no vector") optimized the case where MSI-X is enabled but the queue vector is unset. However, there's an additional case where the guest uses INTx and the INTx_DISABLED bit in the PCI config is set, meaning that no interrupt notifier will actually be used. In such cases, the vring call fd should also be cleared to avoid redundant interrupt handling. Fixes: 96a3d98d2c("vhost: don't set vring call if no vector") Reported-by: Zhiyuan Yuan Signed-off-by: Jidong Xia Signed-off-by: Huaitong Han Message-Id: <20250522100548.212740-1-hanht2@chinatelecom.cn> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit a9403bfcd93025df7b1924d0cf34fbc408955b33) Signed-off-by: Michael Tokarev --- hw/pci/pci.c | 2 +- hw/virtio/virtio-pci.c | 7 ++++++- include/hw/pci/pci.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 2844ec5556..503a897528 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1719,7 +1719,7 @@ static void pci_update_mappings(PCIDevice *d) pci_update_vga(d); } -static inline int pci_irq_disabled(PCIDevice *d) +int pci_irq_disabled(PCIDevice *d) { return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 3ca3f849d3..e60ad843fc 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1215,7 +1215,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, static bool virtio_pci_query_guest_notifiers(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return msix_enabled(&proxy->pci_dev); + + if (msix_enabled(&proxy->pci_dev)) { + return true; + } else { + return pci_irq_disabled(&proxy->pci_dev); + } } static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 822fbacdf0..7e382552b9 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -668,6 +668,7 @@ void lsi53c8xx_handle_legacy_cmdline(DeviceState *lsi_dev); qemu_irq pci_allocate_irq(PCIDevice *pci_dev); void pci_set_irq(PCIDevice *pci_dev, int level); +int pci_irq_disabled(PCIDevice *d); static inline void pci_irq_assert(PCIDevice *pci_dev) { From 325d039d25f3626c0a52075e1d84e504f13e54c2 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 26 May 2025 22:38:20 +0200 Subject: [PATCH 076/136] hw/i386/pc_piix: Fix RTC ISA IRQ wiring of isapc machine Commit 56b1f50e3c10 ("hw/i386/pc: Wire RTC ISA IRQs in south bridges") attempted to refactor RTC IRQ wiring which was previously done in pc_basic_device_init() but forgot about the isapc machine. Fix this by wiring in the code section dedicated exclusively to the isapc machine. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2961 Fixes: 56b1f50e3c10 ("hw/i386/pc: Wire RTC ISA IRQs in south bridges") cc: qemu-stable Signed-off-by: Bernhard Beschow Reviewed-by: Mark Cave-Ayland Message-Id: <20250526203820.1853-1-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 0b006153b7ec66505cb2d231235aa19ca5d2ce37) Signed-off-by: Michael Tokarev --- hw/i386/pc_piix.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6c91e2d292..7cfa142b11 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -285,6 +285,8 @@ static void pc_init1(MachineState *machine, const char *pci_type) pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0"); pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1"); } else { + uint32_t irq; + isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); isa_bus_register_input_irqs(isa_bus, x86ms->gsi); @@ -292,6 +294,9 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86ms->rtc = isa_new(TYPE_MC146818_RTC); qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000); isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal); + irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq", + &error_fatal); + isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq); i8257_dma_init(OBJECT(machine), isa_bus, 0); pcms->hpet_enabled = false; From 8399ccb48041be54cec39b0c08494ea01ca84e4c Mon Sep 17 00:00:00 2001 From: Sairaj Kodilkar Date: Fri, 16 May 2025 15:35:34 +0530 Subject: [PATCH 077/136] hw/i386/amd_iommu: Fix device setup failure when PT is on. Commit c1f46999ef506 ("amd_iommu: Add support for pass though mode") introduces the support for "pt" flag by enabling nodma memory when "pt=off". This allowed VFIO devices to successfully register notifiers by using nodma region. But, This also broke things when guest is booted with the iommu=nopt because, devices bypass the IOMMU and use untranslated addresses (IOVA) to perform DMA reads/writes to the nodma memory region, ultimately resulting in a failure to setup the devices in the guest. Fix the above issue by always enabling the amdvi_dev_as->iommu memory region. But this will once again cause VFIO devices to fail while registering the notifiers with AMD IOMMU memory region. Fixes: c1f46999ef506 ("amd_iommu: Add support for pass though mode") Signed-off-by: Sairaj Kodilkar Reviewed-by: Vasant Hegde Message-Id: <20250516100535.4980-2-sarunkod@amd.com> Fixes: c1f46999ef506 ("amd_iommu: Add support for pass though mode") Signed-off-by: Sairaj Kodilkar Reviewed-by: Vasant Hegde (cherry picked from commit 31753d5a336fbb4e9246397f4b90b6f611f27f22) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 5f9b952799..df8ba5d39a 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1426,7 +1426,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVIState *s = opaque; AMDVIAddressSpace **iommu_as, *amdvi_dev_as; int bus_num = pci_bus_num(bus); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); iommu_as = s->address_spaces[bus_num]; @@ -1486,15 +1485,8 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVI_INT_ADDR_FIRST, &amdvi_dev_as->iommu_ir, 1); - if (!x86_iommu->pt_supported) { - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - true); - } else { - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - false); - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true); - } + memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); + memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true); } return &iommu_as[devfn]->as; } From c3e1181bafaecbcf9deae6a8bb4ad3586e221a3d Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Fri, 16 May 2025 15:35:35 +0530 Subject: [PATCH 078/136] hw/i386/amd_iommu: Fix xtsup when vcpus < 255 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vCPUs > 255 then x86 common code (x86_cpus_init()) call kvm_enable_x2apic(). But if vCPUs <= 255 then the common code won't calls kvm_enable_x2apic(). This is because commit 8c6619f3e692 ("hw/i386/amd_iommu: Simplify non-KVM checks on XTSup feature") removed the call to kvm_enable_x2apic when xtsup is "on", which break things when guest is booted with x2apic mode and there are <= 255 vCPUs. Fix this by adding back kvm_enable_x2apic() call when xtsup=on. Fixes: 8c6619f3e692 ("hw/i386/amd_iommu: Simplify non-KVM checks on XTSup feature") Reported-by: Alejandro Jimenez Tested-by: Tested-by: Alejandro Jimenez Cc: Philippe Mathieu-Daudé Cc: Joao Martins Signed-off-by: Vasant Hegde Signed-off-by: Sairaj Kodilkar Message-Id: <20250516100535.4980-3-sarunkod@amd.com> Fixes: 8c6619f3e692 ("hw/i386/amd_iommu: Simplify non-KVM checks on XTSup feature") Reported-by: Alejandro Jimenez Tested-by: Tested-by: Alejandro Jimenez Cc: Philippe Mathieu-Daudé Cc: Joao Martins Signed-off-by: Vasant Hegde Signed-off-by: Sairaj Kodilkar (cherry picked from commit 0f178860df3489a9d3c19a5f7f024e6aa6c26515) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index df8ba5d39a..af85706b8a 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1649,6 +1649,14 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) exit(EXIT_FAILURE); } + if (s->xtsup) { + if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) { + error_report("AMD IOMMU xtsup=on requires x2APIC support on " + "the KVM side"); + exit(EXIT_FAILURE); + } + } + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } From 47ddae806b9272959965f05b72187bdc933c7d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volker=20R=C3=BCmelin?= Date: Thu, 15 May 2025 07:44:24 +0200 Subject: [PATCH 079/136] audio: fix SIGSEGV in AUD_get_buffer_size_out() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As far as the emulated audio devices are concerned the pointer returned by AUD_open_out() is an opaque handle. This includes the NULL pointer. In this case, AUD_get_buffer_size_out() should return a sensible buffer size instead of triggering a segmentation fault. All other public AUD_*_out() and audio_*_out() functions handle this case. Reviewed-by: Marc-André Lureau Signed-off-by: Volker Rümelin Message-Id: <20250515054429.7385-2-vr_qemu@t-online.de> (cherry picked from commit 5ddd6c8dc849b4af44bd06840c9133d64e62c27c) Signed-off-by: Michael Tokarev --- audio/audio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/audio/audio.c b/audio/audio.c index 41ee11aaad..70ef22b1a4 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -905,6 +905,10 @@ size_t AUD_read(SWVoiceIn *sw, void *buf, size_t size) int AUD_get_buffer_size_out(SWVoiceOut *sw) { + if (!sw) { + return 0; + } + return sw->hw->samples * sw->hw->info.bytes_per_frame; } From 6b12cb6edc09fa28429e72af12517e45b6c7c734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volker=20R=C3=BCmelin?= Date: Thu, 15 May 2025 07:44:25 +0200 Subject: [PATCH 080/136] audio: fix size calculation in AUD_get_buffer_size_out() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer size calculated by AUD_get_buffer_size_out() is often incorrect. sw->hw->samples * sw->hw->info.bytes_per_frame is the size of the mixing engine buffer in audio frames multiplied by the size of one frame of the audio backend. Due to resampling or format conversion, the size of the frontend buffer can differ significantly. Return the correct buffer size when the mixing engine is used. Reviewed-by: Marc-André Lureau Signed-off-by: Volker Rümelin Message-Id: <20250515054429.7385-3-vr_qemu@t-online.de> (cherry picked from commit ccb4fec0e5f233cb61a83b3af59ae11716ea06c0) Signed-off-by: Michael Tokarev --- audio/audio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/audio/audio.c b/audio/audio.c index 70ef22b1a4..3f5baf0cc6 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -909,6 +909,10 @@ int AUD_get_buffer_size_out(SWVoiceOut *sw) return 0; } + if (audio_get_pdo_out(sw->s->dev)->mixing_engine) { + return sw->resample_buf.size * sw->info.bytes_per_frame; + } + return sw->hw->samples * sw->hw->info.bytes_per_frame; } From 92d08b3c0d19467e2bb6ea079aba82c1e6ead9f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volker=20R=C3=BCmelin?= Date: Thu, 15 May 2025 07:44:26 +0200 Subject: [PATCH 081/136] hw/audio/asc: fix SIGSEGV in asc_realize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AUD_open_out() may fail and return NULL. This may then lead to a segmentation fault in memset() below. The memset() behaviour is undefined if the pointer to the destination object is a null pointer. Add the missing error handling code. Reviewed-by: Marc-André Lureau Signed-off-by: Volker Rümelin Reviewed-by: Mark Cave-Ayland Message-Id: <20250515054429.7385-4-vr_qemu@t-online.de> (cherry picked from commit d009f26a54f573468be721590a19350c224bc730) Signed-off-by: Michael Tokarev --- hw/audio/asc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/audio/asc.c b/hw/audio/asc.c index cc205bf063..b7d0fd8acd 100644 --- a/hw/audio/asc.c +++ b/hw/audio/asc.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "qemu/timer.h" +#include "qapi/error.h" #include "hw/sysbus.h" #include "hw/irq.h" #include "audio/audio.h" @@ -654,6 +655,12 @@ static void asc_realize(DeviceState *dev, Error **errp) s->voice = AUD_open_out(&s->card, s->voice, "asc.out", s, asc_out_cb, &as); + if (!s->voice) { + AUD_remove_card(&s->card); + error_setg(errp, "Initializing audio stream failed"); + return; + } + s->shift = 1; s->samples = AUD_get_buffer_size_out(s->voice) >> s->shift; s->mixbuf = g_malloc0(s->samples << s->shift); From 9f80e4707cb05ab356c69ae8fde8537ef0b5810d Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Fri, 3 Jan 2025 00:48:25 -0800 Subject: [PATCH 082/136] target/i386: Remove FRED dependency on WRMSRNS WRMSRNS doesn't become a required feature for FERD, and Linux has removed the dependency, as such remove it from Qemu. Cc: qemu-stable@nongnu.org Signed-off-by: Xin Li (Intel) Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20250103084827.1820007-2-xin@zytor.com Signed-off-by: Paolo Bonzini (cherry picked from commit 0b901459a87a7fdbed36e574aae33e0635a3e9af) Signed-off-by: Michael Tokarev --- target/i386/cpu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 5e12cba1b8..2c9517f56d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1774,10 +1774,6 @@ static FeatureDep feature_dependencies[] = { .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_LKGS }, .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, }, - { - .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, - .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, - }, { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC }, From d8e6f3f885cc9e72a7c298bd7fb830b6f48051cf Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 29 May 2025 16:31:47 -0400 Subject: [PATCH 083/136] iotests: fix 240 Commit 2e8e18c2e463 ("virtio-scsi: add iothread-vq-mapping parameter") removed the limitation that virtio-scsi devices must successfully set the AioContext on their BlockBackends. This was made possible thanks to the QEMU multi-queue block layer. This change broke qemu-iotests 240, which checks that adding a virtio-scsi device with a drive that is already in another AioContext will fail. Update the test to take the relaxed behavior into account. I considered removing this test case entirely, but the code coverage still seems valuable. Fixes: 2e8e18c2e463 ("virtio-scsi: add iothread-vq-mapping parameter") Reported-by: Thomas Huth Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Tested-by: Eric Blake Message-ID: <20250529203147.180338-1-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf (cherry picked from commit 2e887187454e57d04522099d4f04d17137d6e05c) Signed-off-by: Michael Tokarev --- tests/qemu-iotests/240 | 2 -- tests/qemu-iotests/240.out | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 index 9b281e1dc0..f8af9ff648 100755 --- a/tests/qemu-iotests/240 +++ b/tests/qemu-iotests/240 @@ -81,8 +81,6 @@ class TestCase(iotests.QMPTestCase): self.vm.qmp_log('device_del', id='scsi-hd0') self.vm.event_wait('DEVICE_DELETED') - self.vm.qmp_log('device_add', id='scsi-hd1', driver='scsi-hd', drive='hd0', bus="scsi1.0") - self.vm.qmp_log('device_del', id='scsi-hd1') self.vm.event_wait('DEVICE_DELETED') self.vm.qmp_log('blockdev-del', node_name='hd0') diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out index 89ed25e506..10dcc42e06 100644 --- a/tests/qemu-iotests/240.out +++ b/tests/qemu-iotests/240.out @@ -46,10 +46,8 @@ {"execute": "device_add", "arguments": {"bus": "scsi0.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd0"}} {"return": {}} {"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}} -{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}} -{"execute": "device_del", "arguments": {"id": "scsi-hd0"}} {"return": {}} -{"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}} +{"execute": "device_del", "arguments": {"id": "scsi-hd0"}} {"return": {}} {"execute": "device_del", "arguments": {"id": "scsi-hd1"}} {"return": {}} From ab96ea4b4d257919f5d0042eedcb1c3cfa1d0a0c Mon Sep 17 00:00:00 2001 From: Fiona Ebner Date: Fri, 23 May 2025 09:02:11 +0200 Subject: [PATCH 084/136] hw/core/qdev-properties-system: Add missing return in set_drive_helper() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, changing the 'drive' property of e.g. a scsi-hd object will result in an assertion failure if the aio context of the block node it's replaced with doesn't match the current aio context: > bdrv_replace_child_noperm: Assertion `bdrv_get_aio_context(old_bs) == > bdrv_get_aio_context(new_bs)' failed. The problematic scenario is already detected, but a 'return' statement was missing. Cc: qemu-stable@nongnu.org Fixes: d1a58c176a ("qdev: allow setting drive property for realized device") Signed-off-by: Fiona Ebner Message-ID: <20250523070211.280498-1-f.ebner@proxmox.com> Reviewed-by: Daniel P. Berrangé Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf (cherry picked from commit eef2dd03f948a512499775043bdc0c5c88d8a2dd) Signed-off-by: Michael Tokarev --- hw/core/qdev-properties-system.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index a7dde73c29..6b73127123 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, if (ctx != bdrv_get_aio_context(bs)) { error_setg(errp, "Different aio context is not supported for new " "node"); + return; } blk_replace_bs(blk, bs, errp); From 6276ce6d702bd9a7390942bbc29acaaf03b53c4e Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Wed, 4 Jun 2025 14:55:01 +0800 Subject: [PATCH 085/136] hw/loongarch/virt: Fix big endian support with MCFG table With API build_mcfg(), it is not necessary with parameter structure AcpiMcfgInfo to convert to little endian since it is directly used with host native endian. Here remove endian conversion before calling function build_mcfg(). With this patch, bios-tables-test passes to run on big endian host machine S390. Fixes: 735143f10d3e ("hw/loongarch: Add acpi ged support") Cc: qemu-stable@nongnu.org Signed-off-by: Bibo Mao Reviewed-by: Song Gao Message-Id: <20250604065502.1114098-2-maobibo@loongson.cn> Signed-off-by: Song Gao (cherry picked from commit 9c55c03c05c1899521ff0c991b9296633d759890) Signed-off-by: Michael Tokarev --- hw/loongarch/virt-acpi-build.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c index fced6c445a..24ccb580bd 100644 --- a/hw/loongarch/virt-acpi-build.c +++ b/hw/loongarch/virt-acpi-build.c @@ -575,8 +575,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); { AcpiMcfgInfo mcfg = { - .base = cpu_to_le64(VIRT_PCI_CFG_BASE), - .size = cpu_to_le64(VIRT_PCI_CFG_SIZE), + .base = VIRT_PCI_CFG_BASE, + .size = VIRT_PCI_CFG_SIZE, }; build_mcfg(tables_blob, tables->linker, &mcfg, lvms->oem_id, lvms->oem_table_id); From c902fc66c9508c6fa2ec17510f5ae9241c0859fb Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Mon, 2 Jun 2025 12:46:55 +0100 Subject: [PATCH 086/136] hw/arm/virt: Check bypass iommu is not set for iommu-map DT property default_bus_bypass_iommu tells us whether the bypass_iommu is set for the default PCIe root bus. Make sure we check that before adding the "iommu-map" DT property. Cc: qemu-stable@nongnu.org Fixes: 6d7a85483a06 ("hw/arm/virt: Add default_bus_bypass_iommu machine option") Suggested-by: Eric Auger Signed-off-by: Shameer Kolothum Reviewed-by: Donald Dutile Reviewed-by: Eric Auger Message-id: 20250602114655.42920-1-shameerali.kolothum.thodi@huawei.com Signed-off-by: Peter Maydell (cherry picked from commit f5ec751ee70d7960a97c6c675f69e924d82dc60d) Signed-off-by: Michael Tokarev --- hw/arm/virt.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index a96452f17a..0e78616aac 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1492,9 +1492,12 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms) qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle); g_free(node); - qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, bdf, - bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf); + if (!vms->default_bus_bypass_iommu) { + qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, bdf, + bdf + 1, vms->iommu_phandle, bdf + 1, + 0xffff - bdf); + } } static void create_pcie(VirtMachineState *vms) @@ -1617,8 +1620,10 @@ static void create_pcie(VirtMachineState *vms) switch (vms->iommu) { case VIRT_IOMMU_SMMUV3: create_smmu(vms, vms->bus); - qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, 0x10000); + if (!vms->default_bus_bypass_iommu) { + qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, 0x10000); + } break; default: g_assert_not_reached(); From 436b3dd8cc8e8fe67d52716c51781fb0149dd0b2 Mon Sep 17 00:00:00 2001 From: Ethan Chen Date: Fri, 6 Jun 2025 17:57:28 +0800 Subject: [PATCH 087/136] qemu-options.hx: Fix reversed description of icount sleep behavior The documentation for the -icount option incorrectly describes the behavior of the sleep suboption. Based on the actual implementation and system behavior, the effects of sleep=on and sleep=off were inadvertently reversed. This commit updates the description to reflect their intended functionality. Cc: qemu-stable@nongnu.org Fixes: fa647905e6ba ("qemu-options.hx: Fix minor issues in icount documentation") Signed-off-by: Ethan Chen Message-id: 20250606095728.3672832-1-ethan84@andestech.com Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell (cherry picked from commit e372214e663a4370fe064f7867f402eade37357e) Signed-off-by: Michael Tokarev --- qemu-options.hx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/qemu-options.hx b/qemu-options.hx index dc694a99a3..396eea7ef2 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -4936,13 +4936,13 @@ SRST with actual performance. When the virtual cpu is sleeping, the virtual time will advance at - default speed unless ``sleep=on`` is specified. With - ``sleep=on``, the virtual time will jump to the next timer + default speed unless ``sleep=off`` is specified. With + ``sleep=off``, the virtual time will jump to the next timer deadline instantly whenever the virtual cpu goes to sleep mode and will not advance if no timer is enabled. This behavior gives deterministic execution times from the guest point of view. - The default if icount is enabled is ``sleep=off``. - ``sleep=on`` cannot be used together with either ``shift=auto`` + The default if icount is enabled is ``sleep=on``. + ``sleep=off`` cannot be used together with either ``shift=auto`` or ``align=on``. ``align=on`` will activate the delay algorithm which will try to From b7da1f5f5ddd196c7e992b6b7b746c5b54ae1d5a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 5 Jun 2025 15:18:01 +0100 Subject: [PATCH 088/136] hw/arm/mps2: Configure the AN500 CPU with 16 MPU regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AN500 application note documents that it configures the Cortex-M7 CPU to have 16 MPU regions. We weren't doing this in our emulation, so the CPU had only the default 8 MPU regions. Set the mpu-ns-regions property to 16 for this board. This bug doesn't affect any of the other board types we model in this source file, because they all use either the Cortex-M3 or Cortex-M4. Those CPUs do not have an RTL configurable number of MPU regions, and always provide 8 regions if the MPU is built in. Cc: qemu-stable@nongnu.org Reported-by: Corentin GENDRE Signed-off-by: Peter Maydell Reviewed-by: Alex Bennée Message-id: 20250605141801.1083266-1-peter.maydell@linaro.org (cherry picked from commit cd38e638c43e4d5d3fd65dd4529c2e6153c9c408) Signed-off-by: Michael Tokarev --- hw/arm/mps2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c index 3f8db0cab6..313c401eb8 100644 --- a/hw/arm/mps2.c +++ b/hw/arm/mps2.c @@ -224,7 +224,11 @@ static void mps2_common_init(MachineState *machine) switch (mmc->fpga_type) { case FPGA_AN385: case FPGA_AN386: + qdev_prop_set_uint32(armv7m, "num-irq", 32); + break; case FPGA_AN500: + /* The AN500 configures its Cortex-M7 with 16 MPU regions */ + qdev_prop_set_uint32(armv7m, "mpu-ns-regions", 16); qdev_prop_set_uint32(armv7m, "num-irq", 32); break; case FPGA_AN511: From 3cf25f4c71086ec96df7e3b0db7f093a684e6fda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Fri, 13 Jun 2025 17:59:32 +0200 Subject: [PATCH 089/136] linux-user/arm: Fix return value of SYS_cacheflush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although the emulated cacheflush syscall does nothing, it still needs to return zero to indicate success. Cc: qemu-stable@nongnu.org Signed-off-by: J. Neuschäfer Message-id: 20250613-cache-v1-1-ee9f4a9ba81b@gmx.net Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell (cherry picked from commit 5ad2b1f443a96444cf3e7a2fbe17aae696201012) Signed-off-by: Michael Tokarev --- linux-user/arm/cpu_loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c index 7416e3216e..098b54d10e 100644 --- a/linux-user/arm/cpu_loop.c +++ b/linux-user/arm/cpu_loop.c @@ -362,6 +362,7 @@ void cpu_loop(CPUARMState *env) switch (n) { case ARM_NR_cacheflush: /* nop */ + env->regs[0] = 0; break; case ARM_NR_set_tls: cpu_set_tls(env, env->regs[0]); From fdd20285ae19cf26c0d8d561684fb8126fa48f22 Mon Sep 17 00:00:00 2001 From: Song Gao Date: Tue, 3 Jun 2025 10:48:09 +0800 Subject: [PATCH 090/136] target/loongarch: add check for fcond fcond only has 22 types, add a check for fcond. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2972 Signed-off-by: Song Gao Reviewed-by: Richard Henderson Message-Id: <20250603024810.350510-1-gaosong@loongson.cn> (cherry picked from commit e7788da9860c97920c19fa1150806186513ef256) Signed-off-by: Michael Tokarev --- .../loongarch/tcg/insn_trans/trans_fcmp.c.inc | 25 +++++++++++++------ .../loongarch/tcg/insn_trans/trans_vec.c.inc | 16 +++++++++--- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc index 3babf69e4a..6a2c030a6b 100644 --- a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc @@ -4,10 +4,15 @@ */ /* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */ -static uint32_t get_fcmp_flags(int cond) +static uint32_t get_fcmp_flags(DisasContext *ctx, int cond) { uint32_t flags = 0; + /*check cond , cond =[0-8,10,12] */ + if ((cond > 8) &&(cond != 10) && (cond != 12)) { + return -1; + } + if (cond & 0x1) { flags |= FCMP_LT; } @@ -26,9 +31,14 @@ static uint32_t get_fcmp_flags(int cond) static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >>1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_SP(ctx)) { return false; } @@ -39,8 +49,6 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); @@ -50,9 +58,14 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_DP(ctx)) { return false; } @@ -63,8 +76,6 @@ static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc index dff92772ad..d6f0560349 100644 --- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -4655,19 +4655,23 @@ TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if(flags == -1){ + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; @@ -4675,19 +4679,23 @@ static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; From 46cdfdfe9202c4e58aed99bfbf7e20f514c671cd Mon Sep 17 00:00:00 2001 From: Song Gao Date: Wed, 4 Jun 2025 16:40:05 +0800 Subject: [PATCH 091/136] target/loongarch: fix vldi/xvldi raise wrong error on qemu we got an aborted error ** ERROR:../target/loongarch/tcg/insn_trans/trans_vec.c.inc:3574:vldi_get_value: code should not be reached Bail out! ERROR:../target/loongarch/tcg/insn_trans/trans_vec.c.inc:3574:vldi_get_value: code should not be reached Aborted (core dumped) but on 3A600/3A5000 we got a "Illegal instruction" error. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2971 Fixes: 29bb5d727ff ("target/loongarch: Implement vldi") Cc: qemu-stable@nongnu.org Reviewed-by: Bibo Mao Reviewed-by: Richard Henderson Signed-off-by: Song Gao (cherry picked from commit c2a2e1ad2a749caa864281b1d4dc3f16c3f344f6) Signed-off-by: Michael Tokarev --- target/loongarch/tcg/insn_trans/trans_vec.c.inc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc index d6f0560349..78730029cb 100644 --- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -3465,7 +3465,7 @@ TRANS(xvmsknz_b, LASX, gen_xx, gen_helper_vmsknz_b) static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) { int mode; - uint64_t data, t; + uint64_t data = 0, t; /* * imm bit [11:8] is mode, mode value is 0-12. @@ -3570,17 +3570,26 @@ static uint64_t vldi_get_value(DisasContext *ctx, uint32_t imm) } break; default: - generate_exception(ctx, EXCCODE_INE); g_assert_not_reached(); } return data; } +static bool check_valid_vldi_mode(arg_vldi *a) +{ + return extract32(a->imm, 8, 4) <= 12; +} + static bool gen_vldi(DisasContext *ctx, arg_vldi *a, uint32_t oprsz) { int sel, vece; uint64_t value; + if (!check_valid_vldi_mode(a)) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, oprsz)) { return true; } From 22909a1397ad9679f0ffaba3016a3420ace2da21 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 28 Jun 2025 09:57:53 -0600 Subject: [PATCH 092/136] tcg: Fix constant propagation in tcg_reg_alloc_dup The scalar constant must be replicated for dup. Cc: qemu-stable@nongnu.org Fixes: bab1671f0fa ("tcg: Manually expand INDEX_op_dup_vec") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3002 Signed-off-by: Richard Henderson (cherry picked from commit 0d0fc3f4658937fb81fcc16a89738e83bd8d4795) Signed-off-by: Michael Tokarev --- tcg/tcg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index dfd48b8264..b1a7465df2 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -4927,7 +4927,7 @@ static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) if (its->val_type == TEMP_VAL_CONST) { /* Propagate constant via movi -> dupi. */ - tcg_target_ulong val = its->val; + tcg_target_ulong val = dup_const(vece, its->val); if (IS_DEAD_ARG(1)) { temp_dead(s, its); } From ed4bad29e7dd437f7a8168dbe04b15909d945de9 Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Tue, 1 Jul 2025 15:08:25 +0100 Subject: [PATCH 093/136] target/arm: Make RETA[AB] UNDEF when pauth is not implemented MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the Arm A-profile A64 Instruction Set Architecture, RETA[AB] should be decoded as UNDEF if the pauth feature is not implemented. We got this right in the initial implementation, but accidentally dropped the feature-check when we converted these insns to decodetree. Cc: qemu-stable@nongnu.org Fixes: 0ebbe9021254f ("target/arm: Convert BRA[AB]Z, BLR[AB]Z, RETA[AB] to decodetree") Signed-off-by: Solomon Tan Reviewed-by: Alex Bennée Reviewed-by: Richard Henderson Message-id: 20250616171549.59190-1-root@wjsota.com Signed-off-by: Peter Maydell (cherry picked from commit 9a3bf0e0ab628de7051b41a88c4628aa9e4d311b) Signed-off-by: Michael Tokarev --- target/arm/tcg/translate-a64.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 39014325df..f6e88eb5f7 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1821,6 +1821,10 @@ static bool trans_RETA(DisasContext *s, arg_reta *a) { TCGv_i64 dst; + if (!dc_isar_feature(aa64_pauth, s)) { + return false; + } + dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); gen_a64_set_pc(s, dst); s->base.is_jmp = DISAS_JUMP; From 5df7910a237ccb8ebd31f45d4216f3765c6010ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 1 Jul 2025 15:08:33 +0100 Subject: [PATCH 094/136] target/arm: Correct KVM & HVF dtb_compatible value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Linux kernel knows how to parse "arm,armv8", not "arm,arm-v8". See arch/arm64/boot/dts/foundation-v8.dts: https://github.com/torvalds/linux/commit/90556ca1ebdd Cc: qemu-stable@nongnu.org Fixes: 26861c7ce06 ("target-arm: Add minimal KVM AArch64 support") Fixes: 585df85efea ("hvf: arm: Implement -cpu host") Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20250623121845.7214-10-philmd@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit a412575837b6a46584fba891e3706e87bd09a3e6) (Mjt: context fix in target/arm/kvm.c) Signed-off-by: Michael Tokarev --- target/arm/hvf/hvf.c | 2 +- target/arm/kvm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 2439af63a0..01e26a9726 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -878,7 +878,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) hv_vcpu_exit_t *exit; int i; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; ahcf->features = (1ULL << ARM_FEATURE_V8) | (1ULL << ARM_FEATURE_NEON) | (1ULL << ARM_FEATURE_AARCH64) | diff --git a/target/arm/kvm.c b/target/arm/kvm.c index da30bdbb23..e1b87116db 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -305,7 +305,7 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) } ahcf->target = init.target; - ahcf->dtb_compatible = "arm,arm-v8"; + ahcf->dtb_compatible = "arm,armv8"; err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, ARM64_SYS_REG(3, 0, 0, 4, 0)); From fb9bad329cadba1f09642bb20a5ed2ce709b9dc3 Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Fri, 27 Jun 2025 12:25:11 +0100 Subject: [PATCH 095/136] virtio-gpu: support context init multiple timeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Venus and later native contexts have their own fence context along with multiple timelines within. Fences wtih VIRTIO_GPU_FLAG_INFO_RING_IDX in the flags must be dispatched to be created on the target context. Fence signaling also has to be handled on the specific timeline within that target context. Before this change, venus fencing is completely broken if the host driver doesn't support implicit fencing with external memory objects. Frames can go backwards along with random artifacts on screen if the host driver doesn't attach an implicit fence to the render target. The symptom could be hidden by certain guest wsi backend that waits on a venus native VkFence object for the actual payload with limited present modes or under special configs. e.g. x11 mailbox or xwayland. After this change, everything related to venus fencing starts making sense. Confirmed this via guest and host side perfetto tracing. Cc: qemu-stable@nongnu.org Fixes: 94d0ea1c1928 ("virtio-gpu: Support Venus context") Signed-off-by: Yiwei Zhang Reviewed-by: Dmitry Osipenko Message-Id: <20250518152651.334115-1-zzyiwei@gmail.com> [AJB: remove version history from commit message] Tested-by: Dmitry Osipenko Signed-off-by: Alex Bennée Reviewed-by: Akihiko Odaki Message-ID: <20250627112512.1880708-16-alex.bennee@linaro.org> (cherry picked from commit 1fa2ffdbec55d84326e22f046bc3e26322836f5a) Signed-off-by: Michael Tokarev --- hw/display/virtio-gpu-virgl.c | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c index 145a0b3879..94ddc01f91 100644 --- a/hw/display/virtio-gpu-virgl.c +++ b/hw/display/virtio-gpu-virgl.c @@ -970,6 +970,15 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g, } trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); +#if VIRGL_VERSION_MAJOR >= 1 + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id, + VIRGL_RENDERER_FENCE_FLAG_MERGEABLE, + cmd->cmd_hdr.ring_idx, + cmd->cmd_hdr.fence_id); + return; + } +#endif virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type); } @@ -983,6 +992,11 @@ static void virgl_write_fence(void *opaque, uint32_t fence) * the guest can end up emitting fences out of order * so we should check all fenced cmds not just the first one. */ +#if VIRGL_VERSION_MAJOR >= 1 + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) { + continue; + } +#endif if (cmd->cmd_hdr.fence_id > fence) { continue; } @@ -997,6 +1011,29 @@ static void virgl_write_fence(void *opaque, uint32_t fence) } } +#if VIRGL_VERSION_MAJOR >= 1 +static void virgl_write_context_fence(void *opaque, uint32_t ctx_id, + uint32_t ring_idx, uint64_t fence_id) { + VirtIOGPU *g = opaque; + struct virtio_gpu_ctrl_command *cmd, *tmp; + + QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) { + if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX && + cmd->cmd_hdr.ctx_id == ctx_id && cmd->cmd_hdr.ring_idx == ring_idx && + cmd->cmd_hdr.fence_id <= fence_id) { + trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id); + virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA); + QTAILQ_REMOVE(&g->fenceq, cmd, next); + g_free(cmd); + g->inflight--; + if (virtio_gpu_stats_enabled(g->parent_obj.conf)) { + trace_virtio_gpu_dec_inflight_fences(g->inflight); + } + } + } +} +#endif + static virgl_renderer_gl_context virgl_create_context(void *opaque, int scanout_idx, struct virgl_renderer_gl_ctx_param *params) @@ -1031,11 +1068,18 @@ static int virgl_make_context_current(void *opaque, int scanout_idx, } static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = { +#if VIRGL_VERSION_MAJOR >= 1 + .version = 3, +#else .version = 1, +#endif .write_fence = virgl_write_fence, .create_gl_context = virgl_create_context, .destroy_gl_context = virgl_destroy_context, .make_current = virgl_make_context_current, +#if VIRGL_VERSION_MAJOR >= 1 + .write_context_fence = virgl_write_context_fence, +#endif }; static void virtio_gpu_print_stats(void *opaque) From b8f48f40334cb54df36e796b1e825a9061e0fbce Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 25 Jun 2025 10:27:51 +0200 Subject: [PATCH 096/136] hw/s390x/ccw-device: Fix memory leak in loadparm setter Commit bdf12f2a fixed the setter for the "loadparm" machine property, which gets a string from a visitor, passes it to s390_ipl_fmt_loadparm() and then forgot to free it. It left another instance of the same problem unfixed in the "loadparm" device property. Fix it. Signed-off-by: Kevin Wolf Message-ID: <20250625082751.24896-1-kwolf@redhat.com> Reviewed-by: Eric Farman Reviewed-by: Halil Pasic Tested-by: Thomas Huth Signed-off-by: Thomas Huth (cherry picked from commit 78e3781541209b3dcd6f4bb66adf3a3e504b88a4) (Mjt: bdf12f2a is 8efe1592 in stable-10.0 branch) Signed-off-by: Michael Tokarev --- hw/s390x/ccw-device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c index 1ea9934f6c..a5ee9dc84d 100644 --- a/hw/s390x/ccw-device.c +++ b/hw/s390x/ccw-device.c @@ -57,7 +57,7 @@ static void ccw_device_set_loadparm(Object *obj, Visitor *v, Error **errp) { CcwDevice *dev = CCW_DEVICE(obj); - char *val; + g_autofree char *val = NULL; int index; index = object_property_get_int(obj, "bootindex", NULL); From 83b48a500f2f96f9403f7d255d9c58c623342542 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Jul 2025 08:19:24 -0600 Subject: [PATCH 097/136] target/arm: Fix SME vs AdvSIMD exception priority We failed to raise an exception when sme_excp_el == 0 and fp_excp_el == 1. Cc: qemu-stable@nongnu.org Fixes: 3d74825f4d6 ("target/arm: Add SME enablement checks") Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20250704142112.1018902-2-richard.henderson@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit f9b0f69304071384b12912bf9dd78e9ffd261cec) Signed-off-by: Michael Tokarev --- target/arm/tcg/translate-a64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index f6e88eb5f7..aebf313e38 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1499,7 +1499,8 @@ bool sme_enabled_check(DisasContext *s) * to be zero when fp_excp_el has priority. This is because we need * sme_excp_el by itself for cpregs access checks. */ - if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { + if (s->sme_excp_el + && (!s->fp_excp_el || s->sme_excp_el <= s->fp_excp_el)) { bool ret = sme_access_check(s); s->fp_access_checked = (ret ? 1 : -1); return ret; From 0e5f0d87f8e07d72688537282e19881cb9a323e6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Jul 2025 08:19:25 -0600 Subject: [PATCH 098/136] target/arm: Fix sve_access_check for SME Do not assume SME implies SVE. Ensure that the non-streaming check is present along the SME path, since it is not implied by sme_*_enabled_check. Cc: qemu-stable@nongnu.org Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20250704142112.1018902-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit b4b2e070f41dd8774a70c6186141678558d79a38) Signed-off-by: Michael Tokarev --- target/arm/tcg/translate-a64.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index aebf313e38..8d3a8d7a25 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -1392,11 +1392,8 @@ static bool fp_access_check_only(DisasContext *s) return true; } -static bool fp_access_check(DisasContext *s) +static bool nonstreaming_check(DisasContext *s) { - if (!fp_access_check_only(s)) { - return false; - } if (s->sme_trap_nonstreaming && s->is_nonstreaming) { gen_exception_insn(s, 0, EXCP_UDEF, syn_smetrap(SME_ET_Streaming, false)); @@ -1405,6 +1402,11 @@ static bool fp_access_check(DisasContext *s) return true; } +static bool fp_access_check(DisasContext *s) +{ + return fp_access_check_only(s) && nonstreaming_check(s); +} + /* * Return <0 for non-supported element sizes, with MO_16 controlled by * FEAT_FP16; return 0 for fp disabled; otherwise return >0 for success. @@ -1455,14 +1457,24 @@ static int fp_access_check_vector_hsd(DisasContext *s, bool is_q, MemOp esz) */ bool sve_access_check(DisasContext *s) { - if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { + if (dc_isar_feature(aa64_sme, s)) { bool ret; - assert(dc_isar_feature(aa64_sme, s)); - ret = sme_sm_enabled_check(s); + if (s->pstate_sm) { + ret = sme_enabled_check(s); + } else if (dc_isar_feature(aa64_sve, s)) { + goto continue_sve; + } else { + ret = sme_sm_enabled_check(s); + } + if (ret) { + ret = nonstreaming_check(s); + } s->sve_access_checked = (ret ? 1 : -1); return ret; } + + continue_sve: if (s->sve_excp_el) { /* Assert that we only raise one exception per instruction. */ assert(!s->sve_access_checked); From c76ec8d575fb8194a59908d25828286702390903 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Jul 2025 08:19:26 -0600 Subject: [PATCH 099/136] target/arm: Fix 128-bit element ZIP, UZP, TRN We missed the instructions UDEF when the vector size is too small. We missed marking the instructions non-streaming with SME. Cc: qemu-stable@nongnu.org Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20250704142112.1018902-4-richard.henderson@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit e6ffd009c7710a8cc98094897fa0af609c114683) Signed-off-by: Michael Tokarev --- target/arm/tcg/translate-sve.c | 43 ++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index d23be477b4..40d3a032d6 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -2352,6 +2352,23 @@ TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p) *** SVE Permute - Interleaving Group */ +static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn, + arg_rrr_esz *a, int data) +{ + if (sve_access_check(s)) { + unsigned vsz = vec_full_reg_size(s); + if (vsz < 32) { + unallocated_encoding(s); + } else { + tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + vsz, vsz, data, fn); + } + } + return true; +} + static gen_helper_gvec_3 * const zip_fns[4] = { gen_helper_sve_zip_b, gen_helper_sve_zip_h, gen_helper_sve_zip_s, gen_helper_sve_zip_d, @@ -2361,11 +2378,11 @@ TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz, zip_fns[a->esz], a, vec_full_reg_size(s) / 2) -TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_zip_q, a, 0) -TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_zip_q, a, - QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) +TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_zip_q, a, 0) +TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_zip_q, a, + QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2) static gen_helper_gvec_3 * const uzp_fns[4] = { gen_helper_sve_uzp_b, gen_helper_sve_uzp_h, @@ -2377,10 +2394,10 @@ TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz, uzp_fns[a->esz], a, 1 << a->esz) -TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_uzp_q, a, 0) -TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_uzp_q, a, 16) +TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_uzp_q, a, 0) +TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_uzp_q, a, 16) static gen_helper_gvec_3 * const trn_fns[4] = { gen_helper_sve_trn_b, gen_helper_sve_trn_h, @@ -2392,10 +2409,10 @@ TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz, TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz, trn_fns[a->esz], a, 1 << a->esz) -TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_trn_q, a, 0) -TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz, - gen_helper_sve2_trn_q, a, 16) +TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_trn_q, a, 0) +TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q, + gen_helper_sve2_trn_q, a, 16) /* *** SVE Permute Vector - Predicated Group From 9af1de0c4be21a37188af0a27b1b5c250004281c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Jul 2025 08:19:30 -0600 Subject: [PATCH 100/136] target/arm: Fix PSEL size operands to tcg_gen_gvec_ands Gvec only operates on size 8 and multiples of 16. Predicates may be any multiple of 2. Round up the size using the appropriate function. Cc: qemu-stable@nongnu.org Fixes: 598ab0b24c0 ("target/arm: Implement PSEL") Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20250704142112.1018902-8-richard.henderson@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit 3801c5b75ffc60957265513338e8fd5f8b6ce8a1) Signed-off-by: Michael Tokarev --- target/arm/tcg/translate-sve.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 40d3a032d6..b6fa0b67b1 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -7282,6 +7282,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a) tcg_gen_neg_i64(tmp, tmp); /* Apply to either copy the source, or write zeros. */ + pl = size_for_gvec(pl); tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd), pred_full_reg_offset(s, a->pn), tmp, pl, pl); return true; From 9a98db505f20d2e221ef086423483e028ed11937 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Jul 2025 08:19:31 -0600 Subject: [PATCH 101/136] target/arm: Fix f16_dotadd vs nan selection Implement FPProcessNaNs4 within f16_dotadd, rather than simply letting NaNs propagate through the function. Cc: qemu-stable@nongnu.org Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)") Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20250704142112.1018902-9-richard.henderson@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit cfc688c00ade84f6b32c7814b52c217f1d3b5eb1) Signed-off-by: Michael Tokarev --- target/arm/tcg/sme_helper.c | 62 +++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index dcc48e43db..a4992301b1 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -1005,25 +1005,55 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, * - we have pre-set-up copy of s_std which is set to round-to-odd, * for the multiply (see below) */ - float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16); - float64 e1c = float16_to_float64(e1 >> 16, true, s_f16); - float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16); - float64 e2c = float16_to_float64(e2 >> 16, true, s_f16); - float64 t64; + float16 h1r = e1 & 0xffff; + float16 h1c = e1 >> 16; + float16 h2r = e2 & 0xffff; + float16 h2c = e2 >> 16; float32 t32; - /* - * The ARM pseudocode function FPDot performs both multiplies - * and the add with a single rounding operation. Emulate this - * by performing the first multiply in round-to-odd, then doing - * the second multiply as fused multiply-add, and rounding to - * float32 all in one step. - */ - t64 = float64_mul(e1r, e2r, s_odd); - t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); + /* C.f. FPProcessNaNs4 */ + if (float16_is_any_nan(h1r) || float16_is_any_nan(h1c) || + float16_is_any_nan(h2r) || float16_is_any_nan(h2c)) { + float16 t16; + + if (float16_is_signaling_nan(h1r, s_f16)) { + t16 = h1r; + } else if (float16_is_signaling_nan(h1c, s_f16)) { + t16 = h1c; + } else if (float16_is_signaling_nan(h2r, s_f16)) { + t16 = h2r; + } else if (float16_is_signaling_nan(h2c, s_f16)) { + t16 = h2c; + } else if (float16_is_any_nan(h1r)) { + t16 = h1r; + } else if (float16_is_any_nan(h1c)) { + t16 = h1c; + } else if (float16_is_any_nan(h2r)) { + t16 = h2r; + } else { + t16 = h2c; + } + t32 = float16_to_float32(t16, true, s_f16); + } else { + float64 e1r = float16_to_float64(h1r, true, s_f16); + float64 e1c = float16_to_float64(h1c, true, s_f16); + float64 e2r = float16_to_float64(h2r, true, s_f16); + float64 e2c = float16_to_float64(h2c, true, s_f16); + float64 t64; - /* This conversion is exact, because we've already rounded. */ - t32 = float64_to_float32(t64, s_std); + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, s_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std); + + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, s_std); + } /* The final accumulation step is not fused. */ return float32_add(sum, t32, s_std); From 005184da9d95f81ea6022e3f2e1dd95a881948be Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 4 Jul 2025 08:19:32 -0600 Subject: [PATCH 102/136] target/arm: Fix bfdotadd_ebf vs nan selection Implement FPProcessNaNs4 within bfdotadd_ebf, rather than simply letting NaNs propagate through the function. Cc: qemu-stable@nongnu.org Fixes: 0e1850182a1 ("target/arm: Implement FPCR.EBF=1 semantics for bfdotadd()") Signed-off-by: Richard Henderson Reviewed-by: Peter Maydell Message-id: 20250704142112.1018902-10-richard.henderson@linaro.org Signed-off-by: Peter Maydell (cherry picked from commit bf020eaa6741711902a425016e2c7585f222562d) Signed-off-by: Michael Tokarev --- target/arm/tcg/vec_helper.c | 75 ++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 986eaf8ffa..3b7f308803 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -2989,31 +2989,62 @@ float32 bfdotadd(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst) float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, float_status *fpst, float_status *fpst_odd) { - /* - * Compare f16_dotadd() in sme_helper.c, but here we have - * bfloat16 inputs. In particular that means that we do not - * want the FPCR.FZ16 flush semantics, so we use the normal - * float_status for the input handling here. - */ - float64 e1r = float32_to_float64(e1 << 16, fpst); - float64 e1c = float32_to_float64(e1 & 0xffff0000u, fpst); - float64 e2r = float32_to_float64(e2 << 16, fpst); - float64 e2c = float32_to_float64(e2 & 0xffff0000u, fpst); - float64 t64; + float32 s1r = e1 << 16; + float32 s1c = e1 & 0xffff0000u; + float32 s2r = e2 << 16; + float32 s2c = e2 & 0xffff0000u; float32 t32; - /* - * The ARM pseudocode function FPDot performs both multiplies - * and the add with a single rounding operation. Emulate this - * by performing the first multiply in round-to-odd, then doing - * the second multiply as fused multiply-add, and rounding to - * float32 all in one step. - */ - t64 = float64_mul(e1r, e2r, fpst_odd); - t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); + /* C.f. FPProcessNaNs4 */ + if (float32_is_any_nan(s1r) || float32_is_any_nan(s1c) || + float32_is_any_nan(s2r) || float32_is_any_nan(s2c)) { + if (float32_is_signaling_nan(s1r, fpst)) { + t32 = s1r; + } else if (float32_is_signaling_nan(s1c, fpst)) { + t32 = s1c; + } else if (float32_is_signaling_nan(s2r, fpst)) { + t32 = s2r; + } else if (float32_is_signaling_nan(s2c, fpst)) { + t32 = s2c; + } else if (float32_is_any_nan(s1r)) { + t32 = s1r; + } else if (float32_is_any_nan(s1c)) { + t32 = s1c; + } else if (float32_is_any_nan(s2r)) { + t32 = s2r; + } else { + t32 = s2c; + } + /* + * FPConvertNaN(FPProcessNaN(t32)) will be done as part + * of the final addition below. + */ + } else { + /* + * Compare f16_dotadd() in sme_helper.c, but here we have + * bfloat16 inputs. In particular that means that we do not + * want the FPCR.FZ16 flush semantics, so we use the normal + * float_status for the input handling here. + */ + float64 e1r = float32_to_float64(s1r, fpst); + float64 e1c = float32_to_float64(s1c, fpst); + float64 e2r = float32_to_float64(s2r, fpst); + float64 e2c = float32_to_float64(s2c, fpst); + float64 t64; - /* This conversion is exact, because we've already rounded. */ - t32 = float64_to_float32(t64, fpst); + /* + * The ARM pseudocode function FPDot performs both multiplies + * and the add with a single rounding operation. Emulate this + * by performing the first multiply in round-to-odd, then doing + * the second multiply as fused multiply-add, and rounding to + * float32 all in one step. + */ + t64 = float64_mul(e1r, e2r, fpst_odd); + t64 = float64r32_muladd(e1c, e2c, t64, 0, fpst); + + /* This conversion is exact, because we've already rounded. */ + t32 = float64_to_float32(t64, fpst); + } /* The final accumulation step is not fused. */ return float32_add(sum, t32, fpst); From cae6ddfd7e6297d728ab6678d69e721fc38d345d Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Wed, 11 Jun 2025 14:03:15 +0100 Subject: [PATCH 103/136] target/i386: fix TB exit logic in gen_movl_seg() when writing to SS Before commit e54ef98c8a ("target/i386: do not trigger IRQ shadow for LSS"), any write to SS in gen_movl_seg() would cause a TB exit. The changes introduced by this commit were intended to restrict the DISAS_EOB_INHIBIT_IRQ exit to the case where inhibit_irq is true, but missed that a DISAS_EOB_NEXT exit can still be required when writing to SS and inhibit_irq is false. Comparing the PE(s) && !VM86(s) section with the logic in x86_update_hflags(), we can see that the DISAS_EOB_NEXT exit is still required for the !CODE32 case when writing to SS in gen_movl_seg() because any change to the SS flags can affect hflags. Similarly we can see that the existing CODE32 case is still correct since a change to any of DS, ES and SS can affect hflags. Finally for the gen_op_movl_seg_real() case an explicit TB exit is not needed because the segment register selector does not affect hflags. Update the logic in gen_movl_seg() so that a write to SS with inhibit_irq set to false where PE(s) && !VM86(s) will generate a DISAS_EOB_NEXT exit along with the inline comment. This has the effect of allowing Win98SE to boot in QEMU once again. Signed-off-by: Mark Cave-Ayland Fixes: e54ef98c8a ("target/i386: do not trigger IRQ shadow for LSS") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2987 Link: https://lore.kernel.org/r/20250611130315.383151-1-mark.cave-ayland@ilande.co.uk Reviewed-by: Peter Maydell Signed-off-by: Paolo Bonzini (cherry picked from commit 0f1d6606c28d0ae81a1b311972c5c54e5e867bf0) Fixes: 0f1d6606c2 ("target/i386: do not trigger IRQ shadow for LSS" in 10.0.x) Signed-off-by: Michael Tokarev --- target/i386/tcg/translate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 50cf56175f..7e590ef79c 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2000,8 +2000,11 @@ static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit tcg_gen_trunc_tl_i32(sel, src); gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); - /* For move to DS/ES/SS, the addseg or ss32 flags may change. */ - if (CODE32(s) && seg_reg < R_FS) { + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { From fe000c4648b37782f4981fc386fec670304404a9 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 4 Jul 2025 17:56:36 +0100 Subject: [PATCH 104/136] target/arm: Don't enforce NSE,NS check for EL3->EL3 returns In the Arm ARM, rule R_TYTWB that defines illegal exception return cases includes the case: If FEAT_RME is implemented, then if SCR_EL3.{NSE, NS} is {1, 0}, an exception return from EL3 to a lower Exception level Our implementation of this check fails to check that the return is to a lower exception level, so it will incorrectly fire on EL3->EL3 exception returns. Fix the check condition. This requires us to move it further down in the function to a point where we know the new_el value. Fixes: 35aa6715ddcd9 ("target/arm: Catch illegal-exception-return from EL3 with bad NSE/NS") Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3016 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250704165636.261888-1-peter.maydell@linaro.org (cherry picked from commit c563cd7e61d074f58eef413322144461dd243716) Signed-off-by: Michael Tokarev --- target/arm/tcg/helper-a64.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 9244848efe..be0935d194 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -653,15 +653,6 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) spsr &= ~PSTATE_SS; } - /* - * FEAT_RME forbids return from EL3 with an invalid security state. - * We don't need an explicit check for FEAT_RME here because we enforce - * in scr_write() that you can't set the NSE bit without it. - */ - if (cur_el == 3 && (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { - goto illegal_return; - } - new_el = el_from_spsr(spsr); if (new_el == -1) { goto illegal_return; @@ -673,6 +664,17 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } + /* + * FEAT_RME forbids return from EL3 to a lower exception level + * with an invalid security state. + * We don't need an explicit check for FEAT_RME here because we enforce + * in scr_write() that you can't set the NSE bit without it. + */ + if (cur_el == 3 && new_el < 3 && + (env->cp15.scr_el3 & (SCR_NS | SCR_NSE)) == SCR_NSE) { + goto illegal_return; + } + if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) { /* Return to an EL which is configured for a different register width */ goto illegal_return; From 676bc0f4a726627598e6f1d5979ee863191b54ba Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sun, 29 Jun 2025 22:48:50 +0200 Subject: [PATCH 105/136] hw/arm/fsl-imx8mp: Wire VIRQ and VFIQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows to run KVM guests inside the imx8mp-evk machine. Fixes: a4eefc69b237 ("hw/arm: Add i.MX 8M Plus EVK board") CC: qemu-stable Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Peter Maydell (cherry picked from commit 930180f3b9a292639eb894f1ca846683834ed4b7) Signed-off-by: Michael Tokarev --- hw/arm/fsl-imx8mp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c index 82edf61082..d775aa934a 100644 --- a/hw/arm/fsl-imx8mp.c +++ b/hw/arm/fsl-imx8mp.c @@ -356,6 +356,10 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); sysbus_connect_irq(gicsbd, i + ms->smp.cpus, qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); + sysbus_connect_irq(gicsbd, i + 2 * ms->smp.cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); + sysbus_connect_irq(gicsbd, i + 3 * ms->smp.cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); } } From 1714828a56f150327cc05b242cabba4c3f891677 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 10 Jul 2025 12:31:23 +0100 Subject: [PATCH 106/136] linux-user: Implement fchmodat2 syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fchmodat2 syscall is new from Linux 6.6; it is like the existing fchmodat syscall except that it takes a flags parameter. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3019 Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson Message-ID: <20250710113123.1109461-1-peter.maydell@linaro.org> (cherry picked from commit 6a3e132a1be8c9e649967a4eb341d00731be7f51) Signed-off-by: Michael Tokarev --- linux-user/syscall.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 8bfe4912e1..9b397bac7e 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -789,6 +789,10 @@ safe_syscall6(ssize_t, copy_file_range, int, infd, loff_t *, pinoff, int, outfd, loff_t *, poutoff, size_t, length, unsigned int, flags) #endif +#if defined(TARGET_NR_fchmodat2) && defined(__NR_fchmodat2) +safe_syscall4(int, fchmodat2, int, dfd, const char *, filename, + unsigned short, mode, unsigned int, flags) +#endif /* We do ioctl like this rather than via safe_syscall3 to preserve the * "third argument might be integer or pointer or not present" behaviour of @@ -10709,6 +10713,15 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, ret = get_errno(fchmodat(arg1, p, arg3, 0)); unlock_user(p, arg2, 0); return ret; +#endif +#if defined(TARGET_NR_fchmodat2) && defined(__NR_fchmodat2) + case TARGET_NR_fchmodat2: + if (!(p = lock_user_string(arg2))) { + return -TARGET_EFAULT; + } + ret = get_errno(safe_fchmodat2(arg1, p, arg3, arg4)); + unlock_user(p, arg2, 0); + return ret; #endif case TARGET_NR_getpriority: /* Note that negative values are valid for getpriority, so we must From 1759558915c5052f560737061ca6333a16a924f5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 10 Jul 2025 17:43:54 +0100 Subject: [PATCH 107/136] linux-user: Check for EFAULT failure in nanosleep target_to_host_timespec() returns an error if the memory the guest passed us isn't actually readable. We check for this everywhere except the callsite in the TARGET_NR_nanosleep case, so this mistake was caught by a Coverity heuristic. Add the missing error checks to the calls that convert between the host and target timespec structs. Coverity: CID 1507104 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson Message-ID: <20250710164355.1296648-1-peter.maydell@linaro.org> (cherry picked from commit c4828cb8502d0b2adc39b9cde93df7d2886df897) Signed-off-by: Michael Tokarev --- linux-user/syscall.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 9b397bac7e..a8eea5dd52 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -11639,10 +11639,14 @@ static abi_long do_syscall1(CPUArchState *cpu_env, int num, abi_long arg1, case TARGET_NR_nanosleep: { struct timespec req, rem; - target_to_host_timespec(&req, arg1); + if (target_to_host_timespec(&req, arg1)) { + return -TARGET_EFAULT; + } ret = get_errno(safe_nanosleep(&req, &rem)); if (is_error(ret) && arg2) { - host_to_target_timespec(arg2, &rem); + if (host_to_target_timespec(arg2, &rem)) { + return -TARGET_EFAULT; + } } } return ret; From a4e31f5d8ec90a16a250fc39d807f79ce9d99760 Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Fri, 14 Mar 2025 08:47:42 -0400 Subject: [PATCH 108/136] linux-user: Hold the fd-trans lock across fork If another thread is holding target_fd_trans_lock during a fork, then the lock becomes permanently locked in the child and the emulator deadlocks at the next interaction with the fd-trans table. As with other locks, acquire the lock in fork_start() and release it in fork_end(). Cc: qemu-stable@nongnu.org Signed-off-by: Geoffrey Thomas Fixes: c093364f4d91 "fd-trans: Fix race condition on reallocation of the translation table." Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2846 Buglink: https://github.com/astral-sh/uv/issues/6105 Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson Message-ID: <20250314124742.4965-1-geofft@ldpreload.com> (cherry picked from commit e4e839b2eeea5745c48ce47144c7842eb7cd455f) Signed-off-by: Michael Tokarev --- linux-user/fd-trans.h | 10 ++++++++++ linux-user/main.c | 2 ++ 2 files changed, 12 insertions(+) diff --git a/linux-user/fd-trans.h b/linux-user/fd-trans.h index 910faaf237..e14f96059c 100644 --- a/linux-user/fd-trans.h +++ b/linux-user/fd-trans.h @@ -36,6 +36,16 @@ static inline void fd_trans_init(void) qemu_mutex_init(&target_fd_trans_lock); } +static inline void fd_trans_prefork(void) +{ + qemu_mutex_lock(&target_fd_trans_lock); +} + +static inline void fd_trans_postfork(void) +{ + qemu_mutex_unlock(&target_fd_trans_lock); +} + static inline TargetFdDataFunc fd_trans_target_to_host_data(int fd) { if (fd < 0) { diff --git a/linux-user/main.c b/linux-user/main.c index e2ec5970be..2cd867491b 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -149,12 +149,14 @@ void fork_start(void) cpu_list_lock(); qemu_plugin_user_prefork_lock(); gdbserver_fork_start(); + fd_trans_prefork(); } void fork_end(pid_t pid) { bool child = pid == 0; + fd_trans_postfork(); qemu_plugin_user_postfork(child); mmap_fork_end(child); if (child) { From b4ead907261c0a7228e43feb087c3b543da3620f Mon Sep 17 00:00:00 2001 From: "Chaney, Ben" Date: Mon, 16 Jun 2025 20:56:50 +0000 Subject: [PATCH 109/136] migration: Don't sync volatile memory after migration completes Syncing volatile memory provides no benefit, instead it can cause performance issues in some cases. Only sync memory that is marked as non-volatile after migration completes on destination. Signed-off-by: Ben Chaney Fixes: bd108a44bc29 (migration: ram: Switch to ram block writeback) Link: https://lore.kernel.org/r/1CC43F59-336F-4A12-84AD-DB89E0A17A95@akamai.com Signed-off-by: Peter Xu (cherry picked from commit 983899eab4939dc4dff67fa4d822c5b4df7eae21) Signed-off-by: Michael Tokarev --- migration/ram.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 424df6d9f1..a0784d99b2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3598,7 +3598,9 @@ static int ram_load_cleanup(void *opaque) RAMBlock *rb; RAMBLOCK_FOREACH_NOT_IGNORED(rb) { - qemu_ram_block_writeback(rb); + if (memory_region_is_nonvolatile(rb->mr)) { + qemu_ram_block_writeback(rb); + } } xbzrle_load_cleanup(); From c49db93c360762587daf67a274a17749bc60ebe1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 11 Jul 2025 15:12:17 +0100 Subject: [PATCH 110/136] linux-user: Use qemu_set_cloexec() to mark pidfd as FD_CLOEXEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the linux-user do_fork() function we try to set the FD_CLOEXEC flag on a pidfd like this: fcntl(pid_fd, F_SETFD, fcntl(pid_fd, F_GETFL) | FD_CLOEXEC); This has two problems: (1) it doesn't check errors, which Coverity complains about (2) we use F_GETFL when we mean F_GETFD Deal with both of these problems by using qemu_set_cloexec() instead. That function will assert() if the fcntls fail, which is fine (we are inside fork_start()/fork_end() so we know nothing can mess around with our file descriptors here, and we just got this one from pidfd_open()). (As we are touching the if() statement here, we correct the indentation.) Coverity: CID 1508111 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Reviewed-by: Daniel P. Berrangé Signed-off-by: Richard Henderson Message-ID: <20250711141217.1429412-1-peter.maydell@linaro.org> (cherry picked from commit d6390204c61e148488f034d1f79be35cd3318d93) Signed-off-by: Michael Tokarev --- linux-user/syscall.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index a8eea5dd52..3a25abfaca 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6746,10 +6746,9 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, int pid_child = ret; pid_fd = pidfd_open(pid_child, 0); if (pid_fd >= 0) { - fcntl(pid_fd, F_SETFD, fcntl(pid_fd, F_GETFL) - | FD_CLOEXEC); + qemu_set_cloexec(pid_fd); } else { - pid_fd = 0; + pid_fd = 0; } #endif put_user_u32(pid_fd, parent_tidptr); From a655b6548fe2196031a3f325c5a4df7df65ff4cd Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 2 Jul 2025 08:03:19 +0200 Subject: [PATCH 111/136] accel/kvm: Adjust the note about the minimum required kernel version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 126e7f78036 ("kvm: require KVM_CAP_IOEVENTFD and KVM_CAP_IOEVENTFD_ANY_LENGTH") we require at least kernel 4.5 to be able to use KVM. Adjust the upgrade_note accordingly. While we're at it, remove the text about kvm-kmod and the SourceForge URL since this is not actively maintained anymore. Fixes: 126e7f78036 ("kvm: require KVM_CAP_IOEVENTFD and KVM_CAP_IOEVENTFD_ANY_LENGTH") Signed-off-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael Tokarev Signed-off-by: Michael Tokarev (cherry picked from commit f180e367fce44b336105a11a62edf9610b6b2a06) --- accel/kvm/kvm-all.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 951e8214e0..d6002b631e 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2556,8 +2556,7 @@ static int kvm_init(MachineState *ms) { MachineClass *mc = MACHINE_GET_CLASS(ms); static const char upgrade_note[] = - "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n" - "(see http://sourceforge.net/projects/kvm).\n"; + "Please upgrade to at least kernel 4.5.\n"; const struct { const char *name; int num; From 6624ff39720c954081706822da15bc806ea4c318 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Mon, 2 Jun 2025 11:57:17 +0300 Subject: [PATCH 112/136] net: fix buffer overflow in af_xdp_umem_create() s->pool has n_descs elements so maximum i should be n_descs - 1. Fix the upper bound. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: cb039ef3d9 ("net: add initial support for AF_XDP network backend") Cc: qemu-stable@nongnu.org Reviewed-by: Ilya Maximets Signed-off-by: Anastasia Belova Signed-off-by: Jason Wang (cherry picked from commit 110d0fa2d4d1f754242f6775baec43776a9adb35) Signed-off-by: Michael Tokarev --- net/af-xdp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/af-xdp.c b/net/af-xdp.c index 01c5fb914e..d022534d76 100644 --- a/net/af-xdp.c +++ b/net/af-xdp.c @@ -323,7 +323,7 @@ static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) s->pool = g_new(uint64_t, n_descs); /* Fill the pool in the opposite order, because it's a LIFO queue. */ - for (i = n_descs; i >= 0; i--) { + for (i = n_descs - 1; i >= 0; i--) { s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE; } s->n_pool = n_descs; From f272f318c02a4e41a91f1c87d677c44427a2ed89 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 30 May 2025 14:18:53 +0900 Subject: [PATCH 113/136] virtio-net: Add queues for RSS during migration virtio_net_pre_load_queues() inspects vdev->guest_features to tell if VIRTIO_NET_F_RSS or VIRTIO_NET_F_MQ is enabled to infer the required number of queues. This works for VIRTIO_NET_F_MQ but it doesn't for VIRTIO_NET_F_RSS because only the lowest 32 bits of vdev->guest_features is set at the point and VIRTIO_NET_F_RSS uses bit 60 while VIRTIO_NET_F_MQ uses bit 22. Instead of inferring the required number of queues from vdev->guest_features, use the number loaded from the vm state. This change also has a nice side effect to remove a duplicate peer queue pair change by circumventing virtio_net_set_multiqueue(). Also update the comment in include/hw/virtio/virtio.h to prevent an implementation of pre_load_queues() from refering to any fields being loaded during migration by accident in the future. Fixes: 8c49756825da ("virtio-net: Add only one queue pair when realizing") Tested-by: Lei Yang Cc: qemu-stable@nongnu.org Signed-off-by: Akihiko Odaki Signed-off-by: Jason Wang (cherry picked from commit adda0ad56bd28d5a809051cbd190fda5798ec4e4) Signed-off-by: Michael Tokarev --- hw/net/virtio-net.c | 11 ++++------- hw/virtio/virtio.c | 14 +++++++------- include/hw/virtio/virtio.h | 10 ++++++++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index bd37651dab..5f908e5bca 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3021,11 +3021,10 @@ static void virtio_net_del_queue(VirtIONet *n, int index) virtio_del_queue(vdev, index * 2 + 1); } -static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) +static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues) { VirtIODevice *vdev = VIRTIO_DEVICE(n); int old_num_queues = virtio_get_num_queues(vdev); - int new_num_queues = new_max_queue_pairs * 2 + 1; int i; assert(old_num_queues >= 3); @@ -3061,16 +3060,14 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) int max = multiqueue ? n->max_queue_pairs : 1; n->multiqueue = multiqueue; - virtio_net_change_num_queue_pairs(n, max); + virtio_net_change_num_queues(n, max * 2 + 1); virtio_net_set_queue_pairs(n); } -static int virtio_net_pre_load_queues(VirtIODevice *vdev) +static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) { - virtio_net_set_multiqueue(VIRTIO_NET(vdev), - virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) || - virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ)); + virtio_net_change_num_queues(VIRTIO_NET(vdev), n); return 0; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 755260981e..ec54573feb 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -3257,13 +3257,6 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) config_len--; } - if (vdc->pre_load_queues) { - ret = vdc->pre_load_queues(vdev); - if (ret) { - return ret; - } - } - num = qemu_get_be32(f); if (num > VIRTIO_QUEUE_MAX) { @@ -3271,6 +3264,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) return -1; } + if (vdc->pre_load_queues) { + ret = vdc->pre_load_queues(vdev, num); + if (ret) { + return ret; + } + } + for (i = 0; i < num; i++) { vdev->vq[i].vring.num = qemu_get_be32(f); if (k->has_variable_vring_alignment) { diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 6386910280..14c2afed33 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -210,8 +210,14 @@ struct VirtioDeviceClass { void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask); int (*start_ioeventfd)(VirtIODevice *vdev); void (*stop_ioeventfd)(VirtIODevice *vdev); - /* Called before loading queues. Useful to add queues before loading. */ - int (*pre_load_queues)(VirtIODevice *vdev); + /* + * Called before loading queues. + * If the number of queues change at runtime, use @n to know the + * number and add or remove queues accordingly. + * Note that this function is called in the middle of loading vmsd; + * no assumption should be made on states being loaded from vmsd. + */ + int (*pre_load_queues)(VirtIODevice *vdev, uint32_t n); /* Saving and loading of a device; trying to deprecate save/load * use vmsd for new devices. */ From 10a9eedc2049ae977aadd77c973ef4f8522b01a2 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:20 +0000 Subject: [PATCH 114/136] amd_iommu: Fix Miscellaneous Information Register 0 encoding The definitions encoding the maximum Virtual, Physical, and Guest Virtual Address sizes supported by the IOMMU are using incorrect offsets i.e. the VASize and GVASize offsets are switched. The value in the GVAsize field is also modified, since it was incorrectly encoded. Cc: qemu-stable@nongnu.org Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU") Co-developed-by: Ethan MILON Signed-off-by: Ethan MILON Signed-off-by: Alejandro Jimenez Message-Id: <20250617150427.20585-2-alejandro.j.jimenez@oracle.com> Reviewed-by: Vasant Hegde Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 091c7d7924f33781c2fb8e7297dc54971e0c3785) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 28125130c6..921f7e1a4f 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -196,9 +196,9 @@ #define AMDVI_PAGE_SHIFT_4K 12 #define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1)) -#define AMDVI_MAX_VA_ADDR (48UL << 5) -#define AMDVI_MAX_PH_ADDR (40UL << 8) -#define AMDVI_MAX_GVA_ADDR (48UL << 15) +#define AMDVI_MAX_GVA_ADDR (2UL << 5) +#define AMDVI_MAX_PH_ADDR (40UL << 8) +#define AMDVI_MAX_VA_ADDR (48UL << 15) /* Completion Wait data size */ #define AMDVI_COMPLETION_DATA_SIZE 8 From 1ca9d2e0c2916174e50e6dd9fdce783ff1f33bdb Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:21 +0000 Subject: [PATCH 115/136] amd_iommu: Fix Device ID decoding for INVALIDATE_IOTLB_PAGES command The DeviceID bits are extracted using an incorrect offset in the call to amdvi_iotlb_remove_page(). This field is read (correctly) earlier, so use the value already retrieved for devid. Cc: qemu-stable@nongnu.org Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU") Signed-off-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Message-Id: <20250617150427.20585-3-alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit c63b8d1425ba8b3b08ee4f7346457fd8a7f12a24) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index af85706b8a..de55074b21 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -508,7 +508,7 @@ static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd) static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) { - uint16_t devid = extract64(cmd[0], 0, 16); + uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16)); if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) || extract64(cmd[1], 6, 6)) { amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4), @@ -521,7 +521,7 @@ static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd) &devid); } else { amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12, - cpu_to_le16(extract64(cmd[1], 0, 16))); + devid); } trace_amdvi_iotlb_inval(); } From d1ea4a1b0effc5f6658688bc7a5c17a245b78fb5 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:22 +0000 Subject: [PATCH 116/136] amd_iommu: Update bitmasks representing DTE reserved fields The DTE validation method verifies that all bits in reserved DTE fields are unset. Update them according to the latest definition available in AMD I/O Virtualization Technology (IOMMU) Specification - Section 2.2.2.1 Device Table Entry Format. Remove the magic numbers and use a macro helper to generate bitmasks covering the specified ranges for better legibility. Note that some reserved fields specify that events are generated when they contain non-zero bits, or checks are skipped under certain configurations. This change only updates the reserved masks, checks for special conditions are not yet implemented. Cc: qemu-stable@nongnu.org Signed-off-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Message-Id: <20250617150427.20585-4-alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit ff3dcb3bf652912466dcc1cd10d3267f185c212e) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.c | 7 ++++--- hw/i386/amd_iommu.h | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index de55074b21..69d69a9b2d 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -848,9 +848,10 @@ static inline uint64_t amdvi_get_perms(uint64_t entry) static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid, uint64_t *dte) { - if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED) - || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED) - || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) { + if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) || + (dte[1] & AMDVI_DTE_QUAD1_RESERVED) || + (dte[2] & AMDVI_DTE_QUAD2_RESERVED) || + (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) { amdvi_log_illegaldevtab_error(s, devid, s->devtab + devid * AMDVI_DEVTAB_ENTRY_SIZE, 0); diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 921f7e1a4f..60af62bef6 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -25,6 +25,8 @@ #include "hw/i386/x86-iommu.h" #include "qom/object.h" +#define GENMASK64(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) + /* Capability registers */ #define AMDVI_CAPAB_BAR_LOW 0x04 #define AMDVI_CAPAB_BAR_HIGH 0x08 @@ -162,9 +164,10 @@ #define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */ /* reserved DTE bits */ -#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc -#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100 -#define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000 +#define AMDVI_DTE_QUAD0_RESERVED (GENMASK64(6, 2) | GENMASK64(63, 63)) +#define AMDVI_DTE_QUAD1_RESERVED 0 +#define AMDVI_DTE_QUAD2_RESERVED GENMASK64(53, 52) +#define AMDVI_DTE_QUAD3_RESERVED (GENMASK64(14, 0) | GENMASK64(53, 48)) /* AMDVI paging mode */ #define AMDVI_GATS_MODE (2ULL << 12) From caaa64850876a71c46ba0a3c9f5925e8cc49c600 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:23 +0000 Subject: [PATCH 117/136] amd_iommu: Fix masks for various IOMMU MMIO Registers Address various issues with definitions of the MMIO registers e.g. for the Device Table Address Register, the size mask currently encompasses reserved bits [11:9], so change it to only extract the bits [8:0] encoding size. Convert masks to use GENMASK64 for consistency, and make unrelated definitions independent. Cc: qemu-stable@nongnu.org Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU") Signed-off-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Message-Id: <20250617150427.20585-5-alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 108e10ff69099c3ebe147f505246be7c2ad2a499) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 60af62bef6..ef8dc726f1 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -68,34 +68,34 @@ #define AMDVI_MMIO_SIZE 0x4000 -#define AMDVI_MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1) -#define AMDVI_MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~ \ - AMDVI_MMIO_DEVTAB_SIZE_MASK) +#define AMDVI_MMIO_DEVTAB_SIZE_MASK GENMASK64(8, 0) +#define AMDVI_MMIO_DEVTAB_BASE_MASK GENMASK64(51, 12) + #define AMDVI_MMIO_DEVTAB_ENTRY_SIZE 32 #define AMDVI_MMIO_DEVTAB_SIZE_UNIT 4096 /* some of this are similar but just for readability */ #define AMDVI_MMIO_CMDBUF_SIZE_BYTE (AMDVI_MMIO_COMMAND_BASE + 7) #define AMDVI_MMIO_CMDBUF_SIZE_MASK 0x0f -#define AMDVI_MMIO_CMDBUF_BASE_MASK AMDVI_MMIO_DEVTAB_BASE_MASK -#define AMDVI_MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f) -#define AMDVI_MMIO_CMDBUF_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK +#define AMDVI_MMIO_CMDBUF_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_CMDBUF_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_CMDBUF_TAIL_MASK GENMASK64(18, 4) #define AMDVI_MMIO_EVTLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7) -#define AMDVI_MMIO_EVTLOG_SIZE_MASK AMDVI_MMIO_CMDBUF_SIZE_MASK -#define AMDVI_MMIO_EVTLOG_BASE_MASK AMDVI_MMIO_CMDBUF_BASE_MASK -#define AMDVI_MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f) -#define AMDVI_MMIO_EVTLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK +#define AMDVI_MMIO_EVTLOG_SIZE_MASK 0x0f +#define AMDVI_MMIO_EVTLOG_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_EVTLOG_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_EVTLOG_TAIL_MASK GENMASK64(18, 4) -#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7) -#define AMDVI_MMIO_PPRLOG_HEAD_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK -#define AMDVI_MMIO_PPRLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK -#define AMDVI_MMIO_PPRLOG_BASE_MASK AMDVI_MMIO_EVTLOG_BASE_MASK -#define AMDVI_MMIO_PPRLOG_SIZE_MASK AMDVI_MMIO_EVTLOG_SIZE_MASK +#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_PPR_BASE + 7) +#define AMDVI_MMIO_PPRLOG_SIZE_MASK 0x0f +#define AMDVI_MMIO_PPRLOG_BASE_MASK GENMASK64(51, 12) +#define AMDVI_MMIO_PPRLOG_HEAD_MASK GENMASK64(18, 4) +#define AMDVI_MMIO_PPRLOG_TAIL_MASK GENMASK64(18, 4) #define AMDVI_MMIO_EXCL_ENABLED_MASK (1ULL << 0) #define AMDVI_MMIO_EXCL_ALLOW_MASK (1ULL << 1) -#define AMDVI_MMIO_EXCL_LIMIT_MASK AMDVI_MMIO_DEVTAB_BASE_MASK +#define AMDVI_MMIO_EXCL_LIMIT_MASK GENMASK64(51, 12) #define AMDVI_MMIO_EXCL_LIMIT_LOW 0xfff /* mmio control register flags */ @@ -132,14 +132,14 @@ #define AMDVI_DEV_TRANSLATION_VALID (1ULL << 1) #define AMDVI_DEV_MODE_MASK 0x7 #define AMDVI_DEV_MODE_RSHIFT 9 -#define AMDVI_DEV_PT_ROOT_MASK 0xffffffffff000 +#define AMDVI_DEV_PT_ROOT_MASK GENMASK64(51, 12) #define AMDVI_DEV_PT_ROOT_RSHIFT 12 #define AMDVI_DEV_PERM_SHIFT 61 #define AMDVI_DEV_PERM_READ (1ULL << 61) #define AMDVI_DEV_PERM_WRITE (1ULL << 62) /* Device table entry bits 64:127 */ -#define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1) +#define AMDVI_DEV_DOMID_ID_MASK GENMASK64(15, 0) /* Event codes and flags, as stored in the info field */ #define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 12) @@ -197,7 +197,7 @@ #define AMDVI_PAGE_SIZE (1ULL << AMDVI_PAGE_SHIFT) #define AMDVI_PAGE_SHIFT_4K 12 -#define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1)) +#define AMDVI_PAGE_MASK_4K GENMASK64(63, 12) #define AMDVI_MAX_GVA_ADDR (2UL << 5) #define AMDVI_MAX_PH_ADDR (40UL << 8) From 18e70a46c3171feac4f80a6a3d4748a519160616 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:24 +0000 Subject: [PATCH 118/136] amd_iommu: Fix mask to retrieve Interrupt Table Root Pointer from DTE Fix an off-by-one error in the definition of AMDVI_IR_PHYS_ADDR_MASK. The current definition masks off the most significant bit of the Interrupt Table Root ptr i.e. it only generates a mask with bits [50:6] set. See the AMD I/O Virtualization Technology (IOMMU) Specification for the Interrupt Table Root Pointer[51:6] field in the Device Table Entry format. Cc: qemu-stable@nongnu.org Fixes: b44159fe0078 ("x86_iommu/amd: Add interrupt remap support when VAPIC is not enabled") Signed-off-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Message-Id: <20250617150427.20585-6-alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 123cf4bdd378f746dfa2f5415ba084148dded3e3) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index ef8dc726f1..2e390f808d 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -231,7 +231,7 @@ #define AMDVI_IR_INTCTL_PASS 1 #define AMDVI_IR_INTCTL_REMAP 2 -#define AMDVI_IR_PHYS_ADDR_MASK (((1ULL << 45) - 1) << 6) +#define AMDVI_IR_PHYS_ADDR_MASK GENMASK64(51, 6) /* MSI data 10:0 bits (section 2.2.5.1 Fig 14) */ #define AMDVI_IRTE_OFFSET 0x7ff From 90c635c926c13187aa13575af489dd76de737341 Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:25 +0000 Subject: [PATCH 119/136] amd_iommu: Fix the calculation for Device Table size Correctly calculate the Device Table size using the format encoded in the Device Table Base Address Register (MMIO Offset 0000h). Cc: qemu-stable@nongnu.org Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU") Signed-off-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Message-Id: <20250617150427.20585-7-alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 67d3077ee403472d45794399e97c9f329242fce9) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 69d69a9b2d..06df344575 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -665,8 +665,8 @@ static inline void amdvi_handle_devtab_write(AMDVIState *s) uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE); s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK); - /* set device table length */ - s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 * + /* set device table length (i.e. number of entries table can hold) */ + s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) * (AMDVI_MMIO_DEVTAB_SIZE_UNIT / AMDVI_MMIO_DEVTAB_ENTRY_SIZE)); } From fc1ad5124f5407437d0720fc51db6d88013add1a Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Tue, 17 Jun 2025 15:04:26 +0000 Subject: [PATCH 120/136] amd_iommu: Remove duplicated definitions No functional change. Signed-off-by: Alejandro Jimenez Reviewed-by: Vasant Hegde Message-Id: <20250617150427.20585-8-alejandro.j.jimenez@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 5959b641c98b5ae9677e2c1d89902dac31b344d9) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 2e390f808d..04e3ad072e 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -206,10 +206,6 @@ /* Completion Wait data size */ #define AMDVI_COMPLETION_DATA_SIZE 8 -#define AMDVI_COMMAND_SIZE 16 -/* Completion Wait data size */ -#define AMDVI_COMPLETION_DATA_SIZE 8 - #define AMDVI_COMMAND_SIZE 16 #define AMDVI_INT_ADDR_FIRST 0xfee00000 From 787a817cd5691802d9ad69aa1bd969df615b47a9 Mon Sep 17 00:00:00 2001 From: Ethan Milon Date: Tue, 17 Jun 2025 15:04:27 +0000 Subject: [PATCH 121/136] amd_iommu: Fix truncation of oldval in amdvi_writeq The variable `oldval` was incorrectly declared as a 32-bit `uint32_t`. This could lead to truncation and incorrect behavior where the upper read-only 32 bits are significant. Fix the type of `oldval` to match the return type of `ldq_le_p()`. Cc: qemu-stable@nongnu.org Fixes: d29a09ca6842 ("hw/i386: Introduce AMD IOMMU") Signed-off-by: Ethan Milon Message-Id: <20250617150427.20585-9-alejandro.j.jimenez@oracle.com> Reviewed-by: Vasant Hegde Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 5788929e05e18ed5f76dc8ade4210f022c9ba5a1) Signed-off-by: Michael Tokarev --- hw/i386/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 06df344575..f773653487 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -140,7 +140,7 @@ static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val) { uint64_t romask = ldq_le_p(&s->romask[addr]); uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]); - uint32_t oldval = ldq_le_p(&s->mmior[addr]); + uint64_t oldval = ldq_le_p(&s->mmior[addr]); stq_le_p(&s->mmior[addr], ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask)); } From e50ca171e2889ef2653bbf566caf9263ed3b970a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 25 Jun 2025 10:50:19 +0200 Subject: [PATCH 122/136] file-posix: Fix aio=threads performance regression after enablign FUA For aio=threads, we're currently not implementing REQ_FUA in any useful way, but just do a separate raw_co_flush_to_disk() call. This changes behaviour compared to the old state, which used bdrv_co_flush() with its optimisations. As a quick fix, call bdrv_co_flush() again like before. Eventually, we can use pwritev2() to make use of RWF_DSYNC if available, but we'll still have to keep this code path as a fallback, so this fix is required either way. While the fix itself is a one-liner, some new graph locking annotations are needed to convince TSA that the locking is correct. Cc: qemu-stable@nongnu.org Fixes: 984a32f17e8d ("file-posix: Support FUA writes") Buglink: https://issues.redhat.com/browse/RHEL-96854 Reported-by: Tingting Mao Signed-off-by: Kevin Wolf Message-ID: <20250625085019.27735-1-kwolf@redhat.com> Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf (cherry picked from commit d402da1360c2240e81f0e5fc80ddbfc6238e0da8) Signed-off-by: Michael Tokarev --- block/file-posix.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 56d1972d15..796553bafd 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -2484,9 +2484,9 @@ static inline bool raw_check_linux_aio(BDRVRawState *s) } #endif -static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, - uint64_t bytes, QEMUIOVector *qiov, int type, - int flags) +static int coroutine_fn GRAPH_RDLOCK +raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, uint64_t bytes, + QEMUIOVector *qiov, int type, int flags) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; @@ -2545,7 +2545,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); if (ret == 0 && (flags & BDRV_REQ_FUA)) { /* TODO Use pwritev2() instead if it's available */ - ret = raw_co_flush_to_disk(bs); + ret = bdrv_co_flush(bs); } goto out; /* Avoid the compiler err of unused label */ @@ -2580,16 +2580,16 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, return ret; } -static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags); } -static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset, - int64_t bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn GRAPH_RDLOCK +raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes, + QEMUIOVector *qiov, BdrvRequestFlags flags) { return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags); } @@ -3525,10 +3525,11 @@ static int coroutine_fn raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op, #endif #if defined(CONFIG_BLKZONED) -static int coroutine_fn raw_co_zone_append(BlockDriverState *bs, - int64_t *offset, - QEMUIOVector *qiov, - BdrvRequestFlags flags) { +static int coroutine_fn GRAPH_RDLOCK +raw_co_zone_append(BlockDriverState *bs, + int64_t *offset, + QEMUIOVector *qiov, + BdrvRequestFlags flags) { assert(flags == 0); int64_t zone_size_mask = bs->bl.zone_size - 1; int64_t iov_len = 0; From 2533500b4a0fa32737c90852103593f07fceeebf Mon Sep 17 00:00:00 2001 From: Cole Robinson Date: Sun, 18 May 2025 13:54:20 -0400 Subject: [PATCH 123/136] roms: re-remove execute bit from hppa-firmware* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was fixed in c9d77526bddba0803a1fa982fb59ec98057150f9 for 9.2.0 but regressed in db34be329162cf6b06192703065e6c1010dbe3c5 in 10.0.0 When the bit is present, rpmbuild complains about missing ELF build-id Signed-off-by: Cole Robinson Reviewed-by: Daniel P. Berrangé Acked-by: Helge Deller Message-ID: <52d0edfbb9b2f63a866f0065a721f3a95da6f8ba.1747590860.git.crobinso@redhat.com> Signed-off-by: Philippe Mathieu-Daudé (cherry picked from commit a598090ebaeb930ce33c2df0d80d87da13be8848) Signed-off-by: Michael Tokarev --- pc-bios/hppa-firmware.img | Bin pc-bios/hppa-firmware64.img | Bin 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 pc-bios/hppa-firmware.img mode change 100755 => 100644 pc-bios/hppa-firmware64.img diff --git a/pc-bios/hppa-firmware.img b/pc-bios/hppa-firmware.img old mode 100755 new mode 100644 diff --git a/pc-bios/hppa-firmware64.img b/pc-bios/hppa-firmware64.img old mode 100755 new mode 100644 From 12e88c0c6c9448ab383258f1e477c211cf407350 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 3 Jun 2025 13:13:36 +0200 Subject: [PATCH 124/136] vhost: Fix used memslot tracking when destroying a vhost device When we unplug a vhost device, we end up calling vhost_dev_cleanup() where we do a memory_listener_unregister(). This memory_listener_unregister() call will end up disconnecting the listener from the address space through listener_del_address_space(). In that process, we effectively communicate the removal of all memory regions from that listener, resulting in region_del() + commit() callbacks getting triggered. So in case of vhost, we end up calling vhost_commit() with no remaining memory slots (0). In vhost_commit() we end up overwriting the global variables used_memslots / used_shared_memslots, used for detecting the number of free memslots. With used_memslots / used_shared_memslots set to 0 by vhost_commit() during device removal, we'll later assume that the other vhost devices still have plenty of memslots left when calling vhost_get_free_memslots(). Let's fix it by simply removing the global variables and depending only on the actual per-device count. Easy to reproduce by adding two vhost-user devices to a VM and then hot-unplugging one of them. While at it, detect unexpected underflows in vhost_get_free_memslots() and issue a warning. Reported-by: yuanminghao Link: https://lore.kernel.org/qemu-devel/20241121060755.164310-1-yuanmh12@chinatelecom.cn/ Fixes: 2ce68e4cf5be ("vhost: add vhost_has_free_slot() interface") Cc: Igor Mammedov Cc: Michael S. Tsirkin Cc: Stefano Garzarella Signed-off-by: David Hildenbrand Message-Id: <20250603111336.1858888-1-david@redhat.com> Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin (cherry picked from commit 9f749129e2629b19f424df106c92c5a5647e396c) Signed-off-by: Michael Tokarev --- hw/virtio/vhost.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 6aa72fd434..99d31cc1b4 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -47,12 +47,6 @@ static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; -/* Memslots used by backends that support private memslots (without an fd). */ -static unsigned int used_memslots; - -/* Memslots used by backends that only support shared memslots (with an fd). */ -static unsigned int used_shared_memslots; - static QLIST_HEAD(, vhost_dev) vhost_devices = QLIST_HEAD_INITIALIZER(vhost_devices); @@ -74,15 +68,15 @@ unsigned int vhost_get_free_memslots(void) QLIST_FOREACH(hdev, &vhost_devices, entry) { unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); - unsigned int cur_free; + unsigned int cur_free = r - hdev->mem->nregions; - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - cur_free = r - used_shared_memslots; + if (unlikely(r < hdev->mem->nregions)) { + warn_report_once("used (%u) vhost backend memory slots exceed" + " the device limit (%u).", hdev->mem->nregions, r); + free = 0; } else { - cur_free = r - used_memslots; + free = MIN(free, cur_free); } - free = MIN(free, cur_free); } return free; } @@ -666,13 +660,6 @@ static void vhost_commit(MemoryListener *listener) dev->mem = g_realloc(dev->mem, regions_size); dev->mem->nregions = dev->n_mem_sections; - if (dev->vhost_ops->vhost_backend_no_private_memslots && - dev->vhost_ops->vhost_backend_no_private_memslots(dev)) { - used_shared_memslots = dev->mem->nregions; - } else { - used_memslots = dev->mem->nregions; - } - for (i = 0; i < dev->n_mem_sections; i++) { struct vhost_memory_region *cur_vmr = dev->mem->regions + i; struct MemoryRegionSection *mrs = dev->mem_sections + i; @@ -1619,15 +1606,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); /* - * The listener we registered properly updated the corresponding counter. - * So we can trust that these values are accurate. + * The listener we registered properly setup the number of required + * memslots in vhost_commit(). */ - if (hdev->vhost_ops->vhost_backend_no_private_memslots && - hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) { - used = used_shared_memslots; - } else { - used = used_memslots; - } + used = hdev->mem->nregions; + /* * We assume that all reserved memslots actually require a real memslot * in our vhost backend. This might not be true, for example, if the From 2df7954daa523748e1067713d52a6baa334d3dc6 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 3 Jun 2025 18:18:28 +0900 Subject: [PATCH 125/136] ui/vnc: Do not copy z_stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vnc_worker_thread_loop() copies z_stream stored in its local VncState to the persistent VncState, and the copied one is freed with deflateEnd() later. However, deflateEnd() refuses to operate with a copied z_stream and returns Z_STREAM_ERROR, leaking the allocated memory. Avoid copying the zlib state to fix the memory leak. Fixes: bd023f953e5e ("vnc: threaded VNC server") Signed-off-by: Akihiko Odaki Reviewed-by: Marc-André Lureau Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20250603-zlib-v3-1-20b857bd8d05@rsg.ci.i.u-tokyo.ac.jp> (cherry picked from commit aef22331b5a4670f42638a5f63a26e93bf779aae) Signed-off-by: Michael Tokarev --- ui/vnc-enc-zlib.c | 30 +++++++++++++++--------------- ui/vnc.c | 13 ++++++++++--- ui/vnc.h | 2 +- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/ui/vnc-enc-zlib.c b/ui/vnc-enc-zlib.c index 900ae5b30f..52e9193eab 100644 --- a/ui/vnc-enc-zlib.c +++ b/ui/vnc-enc-zlib.c @@ -48,21 +48,21 @@ void vnc_zlib_zfree(void *x, void *addr) static void vnc_zlib_start(VncState *vs) { - buffer_reset(&vs->zlib.zlib); + buffer_reset(&vs->zlib->zlib); // make the output buffer be the zlib buffer, so we can compress it later - vs->zlib.tmp = vs->output; - vs->output = vs->zlib.zlib; + vs->zlib->tmp = vs->output; + vs->output = vs->zlib->zlib; } static int vnc_zlib_stop(VncState *vs) { - z_streamp zstream = &vs->zlib.stream; + z_streamp zstream = &vs->zlib->stream; int previous_out; // switch back to normal output/zlib buffers - vs->zlib.zlib = vs->output; - vs->output = vs->zlib.tmp; + vs->zlib->zlib = vs->output; + vs->output = vs->zlib->tmp; // compress the zlib buffer @@ -85,24 +85,24 @@ static int vnc_zlib_stop(VncState *vs) return -1; } - vs->zlib.level = vs->tight->compression; + vs->zlib->level = vs->tight->compression; zstream->opaque = vs; } - if (vs->tight->compression != vs->zlib.level) { + if (vs->tight->compression != vs->zlib->level) { if (deflateParams(zstream, vs->tight->compression, Z_DEFAULT_STRATEGY) != Z_OK) { return -1; } - vs->zlib.level = vs->tight->compression; + vs->zlib->level = vs->tight->compression; } // reserve memory in output buffer - buffer_reserve(&vs->output, vs->zlib.zlib.offset + 64); + buffer_reserve(&vs->output, vs->zlib->zlib.offset + 64); // set pointers - zstream->next_in = vs->zlib.zlib.buffer; - zstream->avail_in = vs->zlib.zlib.offset; + zstream->next_in = vs->zlib->zlib.buffer; + zstream->avail_in = vs->zlib->zlib.offset; zstream->next_out = vs->output.buffer + vs->output.offset; zstream->avail_out = vs->output.capacity - vs->output.offset; previous_out = zstream->avail_out; @@ -147,8 +147,8 @@ int vnc_zlib_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) void vnc_zlib_clear(VncState *vs) { - if (vs->zlib.stream.opaque) { - deflateEnd(&vs->zlib.stream); + if (vs->zlib->stream.opaque) { + deflateEnd(&vs->zlib->stream); } - buffer_free(&vs->zlib.zlib); + buffer_free(&vs->zlib->zlib); } diff --git a/ui/vnc.c b/ui/vnc.c index c96bd8ceea..ca02ff872a 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -56,6 +56,11 @@ #include "io/dns-resolver.h" #include "monitor/monitor.h" +typedef struct VncConnection { + VncState vs; + VncZlib zlib; +} VncConnection; + #define VNC_REFRESH_INTERVAL_BASE GUI_REFRESH_INTERVAL_DEFAULT #define VNC_REFRESH_INTERVAL_INC 50 #define VNC_REFRESH_INTERVAL_MAX GUI_REFRESH_INTERVAL_IDLE @@ -1364,7 +1369,7 @@ void vnc_disconnect_finish(VncState *vs) vs->magic = 0; g_free(vs->zrle); g_free(vs->tight); - g_free(vs); + g_free(container_of(vs, VncConnection, vs)); } size_t vnc_client_io_error(VncState *vs, ssize_t ret, Error *err) @@ -3243,11 +3248,13 @@ static void vnc_refresh(DisplayChangeListener *dcl) static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, bool skipauth, bool websocket) { - VncState *vs = g_new0(VncState, 1); + VncConnection *vc = g_new0(VncConnection, 1); + VncState *vs = &vc->vs; bool first_client = QTAILQ_EMPTY(&vd->clients); int i; trace_vnc_client_connect(vs, sioc); + vs->zlib = &vc->zlib; vs->zrle = g_new0(VncZrle, 1); vs->tight = g_new0(VncTight, 1); vs->magic = VNC_MAGIC; @@ -3270,7 +3277,7 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, #ifdef CONFIG_PNG buffer_init(&vs->tight->png, "vnc-tight-png/%p", sioc); #endif - buffer_init(&vs->zlib.zlib, "vnc-zlib/%p", sioc); + buffer_init(&vc->zlib.zlib, "vnc-zlib/%p", sioc); buffer_init(&vs->zrle->zrle, "vnc-zrle/%p", sioc); buffer_init(&vs->zrle->fb, "vnc-zrle-fb/%p", sioc); buffer_init(&vs->zrle->zlib, "vnc-zrle-zlib/%p", sioc); diff --git a/ui/vnc.h b/ui/vnc.h index 02613aa63a..82b883bb69 100644 --- a/ui/vnc.h +++ b/ui/vnc.h @@ -340,7 +340,7 @@ struct VncState * update vnc_async_encoding_start() */ VncTight *tight; - VncZlib zlib; + VncZlib *zlib; VncHextile hextile; VncZrle *zrle; VncZywrle zywrle; From d0975531586742ec2eff8796b7ba93bc4858e63d Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 17 Jul 2025 06:23:26 +0300 Subject: [PATCH 126/136] i386/cpu: Fix cpu number overflow in CPUID.01H.EBX[23:16] The legacy topology enumerated by CPUID.1.EBX[23:16] is defined in SDM Vol2: Bits 23-16: Maximum number of addressable IDs for logical processors in this physical package. When threads_per_socket > 255, it will 1) overwrite bits[31:24] which is apic_id, 2) bits [23:16] get truncated. Specifically, if launching the VM with -smp 256, the value written to EBX[23:16] is 0 because of data overflow. If the guest only supports legacy topology, without V2 Extended Topology enumerated by CPUID.0x1f or Extended Topology enumerated by CPUID.0x0b to support over 255 CPUs, the return of the kernel invoking cpu_smt_allowed() is false and APs (application processors) will fail to bring up. Then only CPU 0 is online, and others are offline. For example, launch VM via: qemu-system-x86_64 -M q35,accel=kvm,kernel-irqchip=split \ -cpu qemu64,cpuid-0xb=off -smp 256 -m 32G \ -drive file=guest.img,if=none,id=virtio-disk0,format=raw \ -device virtio-blk-pci,drive=virtio-disk0,bootindex=1 --nographic The guest shows: CPU(s): 256 On-line CPU(s) list: 0 Off-line CPU(s) list: 1-255 To avoid this issue caused by overflow, limit the max value written to EBX[23:16] to 255 as the HW does. Cc: qemu-stable@nongnu.org Reviewed-by: Xiaoyao Li Signed-off-by: Qian Wen Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20250714080859.1960104-6-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini (cherry picked from commit a62fef58299562aae6667b8d8552247423e886b3) (Mjt: fixup for 10.0.x series due to missing v10.0.0-2217-gf985a1195b "i386/cpu: Fix number of addressable IDs field for CPUID.01H.EBX[23:16]") Signed-off-by: Michael Tokarev --- target/i386/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 2c9517f56d..4603b2b98b 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6828,7 +6828,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } *edx = env->features[FEAT_1_EDX]; if (threads_per_pkg > 1) { - *ebx |= threads_per_pkg << 16; + /* Fixup overflow: max value for bits 23-16 is 255. */ + *ebx |= MIN(threads_per_pkg, 255) << 16; } if (!cpu->enable_pmu) { *ecx &= ~CPUID_EXT_PDCM; From 18224789994e0276a626a4e4e5554695857bd998 Mon Sep 17 00:00:00 2001 From: Qian Wen Date: Mon, 14 Jul 2025 16:08:58 +0800 Subject: [PATCH 127/136] i386/cpu: Fix overflow of cache topology fields in CPUID.04H According to SDM, CPUID.0x4:EAX[31:26] indicates the Maximum number of addressable IDs for processor cores in the physical package. If we launch over 64 cores VM, the 6-bit field will overflow, and the wrong core_id number will be reported. Since the HW reports 0x3f when the intel processor has over 64 cores, limit the max value written to EAX[31:26] to 63, so max num_cores should be 64. For EAX[14:25], though at present Q35 supports up to 4096 CPUs, by constructing a specific topology, the width of the APIC ID can be extended beyond 12 bits. For example, using `-smp threads=33,cores=9, modules=9` results in a die level offset of 6 + 4 + 4 = 14 bits, which can also cause overflow. check and honor the maximum value for EAX[14:25] as well. In addition, for host-cache-info case, also apply the same checks and fixes. Reviewed-by: Xiaoyao Li Signed-off-by: Qian Wen Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20250714080859.1960104-7-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini (cherry picked from commit 3e86124e7cb9b66e07fb992667865a308f16fcf2) Signed-off-by: Michael Tokarev --- target/i386/cpu.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 4603b2b98b..0480ad4ab1 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -283,11 +283,17 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache, assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); + /* + * The following fields have bit-width limitations, so consider the + * maximum values to avoid overflow: + * Bits 25-14: maximum 4095. + * Bits 31-26: maximum 63. + */ *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - (max_core_ids_in_package(topo_info) << 26) | - (max_thread_ids_for_cache(topo_info, cache->share_level) << 14); + (MIN(max_core_ids_in_package(topo_info), 63) << 26) | + (MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -6867,13 +6873,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); *eax &= ~0xFC000000; - *eax |= max_core_ids_in_package(topo_info) << 26; + *eax |= MIN(max_core_ids_in_package(topo_info), 63) << 26; if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; /* Share the cache at package level. */ - *eax |= max_thread_ids_for_cache(topo_info, - CPU_TOPOLOGY_LEVEL_SOCKET) << 14; + *eax |= MIN(max_thread_ids_for_cache(topo_info, + CPU_TOPOLOGY_LEVEL_SOCKET), 4095) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { From 82517381c54d87985994ad597d274e3f2e18a484 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Mon, 14 Jul 2025 16:08:59 +0800 Subject: [PATCH 128/136] i386/cpu: Honor maximum value for CPUID.8000001DH.EAX[25:14] CPUID.8000001DH:EAX[25:14] is "NumSharingCache", and the number of logical processors sharing this cache is the value of this field incremented by 1. Because of its width limitation, the maximum value currently supported is 4095. Though at present Q35 supports up to 4096 CPUs, by constructing a specific topology, the width of the APIC ID can be extended beyond 12 bits. For example, using `-smp threads=33,cores=9,modules=9` results in a die level offset of 6 + 4 + 4 = 14 bits, which can also cause overflow. Check and honor the maximum value as CPUID.04H did. Cc: Babu Moger Signed-off-by: Zhao Liu Link: https://lore.kernel.org/r/20250714080859.1960104-8-zhao1.liu@intel.com Signed-off-by: Paolo Bonzini (cherry picked from commit 5d21ee453ad8e3f95f75e542cb3b35c5bb7cf23a) Signed-off-by: Michael Tokarev --- target/i386/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0480ad4ab1..b768d8ea33 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -496,7 +496,8 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); - *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14; + /* Bits 25:14 - NumSharingCache: maximum 4095. */ + *eax |= MIN(max_thread_ids_for_cache(topo_info, cache->share_level), 4095) << 14; assert(cache->line_size > 0); assert(cache->partitions > 0); From 24778b1c7ee7aca9721ed4757b0e0df0c16390f7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 14 Jul 2025 10:19:36 +0200 Subject: [PATCH 129/136] target/i386: do not expose ARCH_CAPABILITIES on AMD CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM emulates the ARCH_CAPABILITIES on x86 for both Intel and AMD cpus, although the IA32_ARCH_CAPABILITIES MSR is an Intel-specific MSR and it makes no sense to emulate it on AMD. As a consequence, VMs created on AMD with qemu -cpu host and using KVM will advertise the ARCH_CAPABILITIES feature and provide the IA32_ARCH_CAPABILITIES MSR. This can cause issues (like Windows BSOD) as the guest OS might not expect this MSR to exist on such cpus (the AMD documentation specifies that ARCH_CAPABILITIES feature and MSR are not defined on the AMD architecture). A fix was proposed in KVM code, however KVM maintainers don't want to change this behavior that exists for 6+ years and suggest changes to be done in QEMU instead. Therefore, hide the bit from "-cpu host": migration of -cpu host guests is only possible between identical host kernel and QEMU versions, therefore this is not a problematic breakage. If a future AMD machine does include the MSR, that would re-expose the Windows guest bug; but it would not be KVM/QEMU's problem at that point, as we'd be following a genuine physical CPU impl. Reported-by: Alexandre Chartre Suggested-by: Daniel P. Berrangé Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini (cherry picked from commit d3a24134e37d57abd3e7445842cda2717f49e96d) Signed-off-by: Michael Tokarev --- target/i386/kvm/kvm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 6c749d4ee8..141694f803 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -500,8 +500,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is * returned by KVM_GET_MSR_INDEX_LIST. + * + * But also, because Windows does not like ARCH_CAPABILITIES on AMD + * mcahines at all, do not show the fake ARCH_CAPABILITIES MSR that + * KVM sets up. */ - if (!has_msr_arch_capabs) { + if (!has_msr_arch_capabs || !(edx & CPUID_7_0_EDX_ARCH_CAPABILITIES)) { ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; } } else if (function == 7 && index == 1 && reg == R_EAX) { From 9561a8c14049a851bb5b83d92707bec2d6692052 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 14 Jul 2025 17:55:20 +0100 Subject: [PATCH 130/136] hw/net/npcm_gmac.c: Send the right data for second packet in a row The transmit loop in gmac_try_send_next_packet() is constructed in a way that means it will send incorrect data if it it sends more than one packet. The function assembles the outbound data in a dynamically allocated block of memory which is pointed to by tx_send_buffer. We track the first point in this block of memory which is not yet used with the prev_buf_size offset, initially zero. We track the size of the packet we're sending with the length variable, also initially zero. As we read chunks of data out of guest memory, we write them to tx_send_buffer[prev_buf_size], and then increment both prev_buf_size and length. (We might dynamically reallocate the buffer if needed.) When we send a packet, we checksum and send length bytes, starting at tx_send_buffer, and then we reset length to 0. This gives the right data for the first packet. But we don't reset prev_buf_size. This means that if we process more descriptors with further data for the next packet, that data will continue to accumulate at offset prev_buf_size, i.e. after the data for the first packet. But when we transmit that second packet, we send length bytes from tx_send_buffer, so we will send a packet which has the length of the second packet but the data of the first one. The fix for this is to also clear prev_buf_size after the packet has been sent -- we never need the data from packet one after we've sent it, so we can write packet two's data starting at the beginning of the buffer. Cc: qemu-stable@nongnu.org Signed-off-by: Peter Maydell Signed-off-by: Jason Wang (cherry picked from commit 871a6e5b339f0b5e71925ec7d3f452944a1c82d3) Signed-off-by: Michael Tokarev --- hw/net/npcm_gmac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c index e1fb383772..d4dba630ac 100644 --- a/hw/net/npcm_gmac.c +++ b/hw/net/npcm_gmac.c @@ -615,6 +615,7 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) trace_npcm_gmac_packet_sent(DEVICE(gmac)->canonical_path, length); buf = tx_send_buffer; length = 0; + prev_buf_size = 0; } /* step 6 */ From 08fa61a3652e36e6966bc6a3473f7a333c4058a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Wed, 4 Jun 2025 16:47:31 +0100 Subject: [PATCH 131/136] ui: fix setting client_endian field defaults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a VNC client sends a "set pixel format" message, the 'client_endian' field will get initialized, however, it is valid to omit this message if the client wants to use the server's native pixel format. In the latter scenario nothing is initializing the 'client_endian' field, so it remains set to 0, matching neither G_LITTLE_ENDIAN nor G_BIG_ENDIAN. This then results in pixel format conversion routines taking the wrong code paths. This problem existed before the 'client_be' flag was changed into the 'client_endian' value, but the lack of initialization meant it semantically defaulted to little endian, so only big endian systems would potentially be exposed to incorrect pixel translation. The 'virt-viewer' / 'remote-viewer' apps always send a "set pixel format" message so aren't exposed to any problems, but the classical 'vncviewer' app will show the problem easily. Fixes: 7ed96710e82c385c6cfc3d064eec7dde20f0f3fd Reported-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Marc-André Lureau Signed-off-by: Daniel P. Berrangé (cherry picked from commit 3ac6daa9e1c5d7dae2a3cd1c6a388174b462f3e8) Signed-off-by: Michael Tokarev --- ui/vnc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ui/vnc.c b/ui/vnc.c index ca02ff872a..a6bf8442d5 100644 --- a/ui/vnc.c +++ b/ui/vnc.c @@ -2336,6 +2336,7 @@ static void pixel_format_message (VncState *vs) { char pad[3] = { 0, 0, 0 }; vs->client_pf = qemu_default_pixelformat(32); + vs->client_endian = G_BYTE_ORDER; vnc_write_u8(vs, vs->client_pf.bits_per_pixel); /* bits-per-pixel */ vnc_write_u8(vs, vs->client_pf.depth); /* depth */ From 3341f5cd5c391c6b460f2f70cc694ca5ef3b2191 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 21 Jul 2025 10:07:52 +0100 Subject: [PATCH 132/136] target/arm: Correct encoding of Debug Communications Channel registers We don't implement the Debug Communications Channel (DCC), but we do attempt to provide dummy versions of its system registers so that software that tries to access them doesn't fall over. However, we got the tx/rx register definitions wrong. These should be: AArch32: DBGDTRTX p14 0 c0 c5 0 (on writes) DBGDTRRX p14 0 c0 c5 0 (on reads) AArch64: DBGDTRTX_EL0 2 3 0 5 0 (on writes) DBGDTRRX_EL0 2 3 0 5 0 (on reads) DBGDTR_EL0 2 3 0 4 0 (reads and writes) where DBGDTRTX and DBGDTRRX are effectively different names for the same 32-bit register, which has tx behaviour on writes and rx behaviour on reads. The AArch64-only DBGDTR_EL0 is a 64-bit wide register whose top and bottom halves map to the DBGDTRRX and DBGDTRTX registers. Currently we have just one cpreg struct, which: * calls itself DBGDTR_EL0 * uses the DBGDTRTX_EL0/DBGDTRRX_EL0 encoding * is marked as ARM_CP_STATE_BOTH but has the wrong opc1 value for AArch32 * is implemented as RAZ/WI Correct the encoding so: * we name the DBGDTRTX/DBGDTRRX register correctly * we split it into AA64 and AA32 versions so we can get the AA32 encoding right * we implement DBGDTR_EL0 at its correct encoding Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2986 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20250708141049.778361-1-peter.maydell@linaro.org (cherry picked from commit 655659a74a36b63e33d2dc969d3c44beb1b008b3) Signed-off-by: Michael Tokarev --- target/arm/debug_helper.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/target/arm/debug_helper.c b/target/arm/debug_helper.c index a9a619ba6b..79c0e8eaff 100644 --- a/target/arm/debug_helper.c +++ b/target/arm/debug_helper.c @@ -986,11 +986,20 @@ static const ARMCPRegInfo debug_cp_reginfo[] = { .opc0 = 2, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2, .access = PL1_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, - /* DBGDTRTX_EL0/DBGDTRRX_EL0 depend on direction */ - { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_BOTH, .cp = 14, + /* Architecturally DBGDTRTX is named DBGDTRRX when used for reads */ + { .name = "DBGDTRTX_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 5, .opc2 = 0, .access = PL0_RW, .accessfn = access_tdcc, .type = ARM_CP_CONST, .resetvalue = 0 }, + { .name = "DBGDTRTX", .state = ARM_CP_STATE_AA32, .cp = 14, + .opc1 = 0, .crn = 0, .crm = 5, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, + /* This is AArch64-only and is a combination of DBGDTRTX and DBGDTRRX */ + { .name = "DBGDTR_EL0", .state = ARM_CP_STATE_AA64, + .opc0 = 2, .opc1 = 3, .crn = 0, .crm = 4, .opc2 = 0, + .access = PL0_RW, .accessfn = access_tdcc, + .type = ARM_CP_CONST, .resetvalue = 0 }, /* * OSECCR_EL1 provides a mechanism for an operating system * to access the contents of EDECCR. EDECCR is not implemented though, From a5ac9803d513586eefe6c7613b3c3bcc1b9e5943 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 15 Jul 2025 00:01:39 +0800 Subject: [PATCH 133/136] hvf: arm: Emulate ICC_RPR_EL1 accesses properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit a2260983c655 ("hvf: arm: Add support for GICv3") added GICv3 support by implementing emulation for a few system registers. ICC_RPR_EL1 was defined but not plugged in the sysreg handlers (for no good reason). Fix it. Fixes: a2260983c655 ("hvf: arm: Add support for GICv3") Signed-off-by: Zenghui Yu Reviewed-by: Philippe Mathieu-Daudé Message-id: 20250714160139.10404-3-zenghui.yu@linux.dev Signed-off-by: Peter Maydell (cherry picked from commit e6da704b711d5d731e4d933ad56cbbc25ee0a825) Signed-off-by: Michael Tokarev --- target/arm/hvf/hvf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 01e26a9726..8a4a41d15b 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -1352,6 +1352,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint64_t *val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: @@ -1666,6 +1667,7 @@ static int hvf_sysreg_write(CPUState *cpu, uint32_t reg, uint64_t val) case SYSREG_ICC_IGRPEN0_EL1: case SYSREG_ICC_IGRPEN1_EL1: case SYSREG_ICC_PMR_EL1: + case SYSREG_ICC_RPR_EL1: case SYSREG_ICC_SGI0R_EL1: case SYSREG_ICC_SGI1R_EL1: case SYSREG_ICC_SRE_EL1: From 66d21643c2b8f2bec969a80ccea09667c77151ee Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Tue, 22 Jul 2025 20:46:10 +0300 Subject: [PATCH 134/136] Update version for 10.0.3 release Signed-off-by: Michael Tokarev --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 7e2df97fe5..6a7144d304 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.0.2 +10.0.3 From 0993469ab4c9fdf21fddbe15bf4479a5c8c864b6 Mon Sep 17 00:00:00 2001 From: Romain Malmain Date: Tue, 12 Aug 2025 13:52:36 +0200 Subject: [PATCH 135/136] fix typing issue --- linux-user/strace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/strace.c b/linux-user/strace.c index 3b744ccd4a..90a3718aae 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -54,7 +54,7 @@ struct flags { }; /* No 'struct flags' element should have a zero mask. */ -#define FLAG_BASIC(V, M, N) { V, M | QEMU_BUILD_BUG_ON_ZERO(!(M)), N } +#define FLAG_BASIC(V, M, N) { V, M | QEMU_BUILD_BUG_ON_ZERO(!((M) > 0 ? 1 : 0)), N } /* common flags for all architectures */ #define FLAG_GENERIC_MASK(V, M) FLAG_BASIC(V, M, #V) From 124e7a4131bbb8eb2d48775df8ae2137e5e46a1b Mon Sep 17 00:00:00 2001 From: Romain Malmain Date: Tue, 12 Aug 2025 15:06:19 +0200 Subject: [PATCH 136/136] update ci --- .github/workflows/build_and_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 1339d82868..41a2c1846a 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -16,7 +16,7 @@ concurrency: jobs: build: - runs-on: [self-hosted, qemu] + runs-on: ubuntu-24.04 container: registry.gitlab.com/qemu-project/qemu/qemu/ubuntu2204:latest steps: - uses: actions/checkout@v4