Skip to content

Commit 3591ddd

Browse files
committed
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* Various fixes * libdaxctl support to correctly align devdax character devices (Jingqi) * initial-all-set support for live migration (Jay) * forbid '-numa node, mem' for 5.1 and newer machine types (Igor) * x87 fixes (Joseph) * Tighten memory_region_access_valid (Michael) and fix fallout (myself) * Replay fixes (Pavel) # gpg: Signature made Fri 26 Jun 2020 14:42:17 BST # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "[email protected]" # gpg: Good signature from "Paolo Bonzini <[email protected]>" [full] # gpg: aka "Paolo Bonzini <[email protected]>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: (31 commits) i386: Mask SVM features if nested SVM is disabled ibex_uart: fix XOR-as-pow vmport: move compat properties to hw_compat_5_0 hyperv: vmbus: Remove the 2nd IRQ kvm: i386: allow TSC to differ by NTP correction bounds without TSC scaling numa: forbid '-numa node, mem' for 5.1 and newer machine types osdep: Make MIN/MAX evaluate arguments only once target/i386: Add notes for versioned CPU models target/i386: reimplement fpatan using floatx80 operations target/i386: reimplement fyl2x using floatx80 operations target/i386: reimplement fyl2xp1 using floatx80 operations target/i386: reimplement fprem, fprem1 using floatx80 operations softfloat: return low bits of quotient from floatx80_modrem softfloat: do not set denominator high bit for floatx80 remainder softfloat: do not return pseudo-denormal from floatx80 remainder softfloat: fix floatx80 remainder pseudo-denormal check for zero softfloat: merge floatx80_mod and floatx80_rem target/i386: reimplement f2xm1 using floatx80 operations xen: Actually fix build without passthrough Makefile: Install qemu-[qmp/ga]-ref.* into the directory "interop" ... Signed-off-by: Peter Maydell <[email protected]>
2 parents 87fb952 + 730319a commit 3591ddd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+6211
-370
lines changed

Makefile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -873,8 +873,9 @@ install-sphinxdocs: sphinxdocs
873873
install-doc: $(DOCS) install-sphinxdocs
874874
$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
875875
$(INSTALL_DATA) $(MANUAL_BUILDDIR)/index.html "$(DESTDIR)$(qemu_docdir)"
876-
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)"
877-
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)"
876+
$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)/interop"
877+
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.html "$(DESTDIR)$(qemu_docdir)/interop"
878+
$(INSTALL_DATA) docs/interop/qemu-qmp-ref.txt "$(DESTDIR)$(qemu_docdir)/interop"
878879
ifdef CONFIG_POSIX
879880
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
880881
$(INSTALL_DATA) $(MANUAL_BUILDDIR)/system/qemu.1 "$(DESTDIR)$(mandir)/man1"
@@ -892,8 +893,9 @@ ifdef CONFIG_TRACE_SYSTEMTAP
892893
endif
893894
ifneq (,$(findstring qemu-ga,$(TOOLS)))
894895
$(INSTALL_DATA) $(MANUAL_BUILDDIR)/interop/qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
895-
$(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)"
896-
$(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)"
896+
$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)/interop"
897+
$(INSTALL_DATA) docs/interop/qemu-ga-ref.html "$(DESTDIR)$(qemu_docdir)/interop"
898+
$(INSTALL_DATA) docs/interop/qemu-ga-ref.txt "$(DESTDIR)$(qemu_docdir)/interop"
897899
$(INSTALL_DATA) docs/interop/qemu-ga-ref.7 "$(DESTDIR)$(mandir)/man7"
898900
endif
899901
endif

accel/kvm/kvm-all.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ struct KVMState
101101
bool kernel_irqchip_required;
102102
OnOffAuto kernel_irqchip_split;
103103
bool sync_mmu;
104-
bool manual_dirty_log_protect;
104+
uint64_t manual_dirty_log_protect;
105105
/* The man page (and posix) say ioctl numbers are signed int, but
106106
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
107107
* unsigned, and treating them as signed here can break things */
@@ -1995,6 +1995,7 @@ static int kvm_init(MachineState *ms)
19951995
int ret;
19961996
int type = 0;
19971997
const char *kvm_type;
1998+
uint64_t dirty_log_manual_caps;
19981999

19992000
s = KVM_STATE(ms->accelerator);
20002001

@@ -2120,14 +2121,20 @@ static int kvm_init(MachineState *ms)
21202121
s->coalesced_pio = s->coalesced_mmio &&
21212122
kvm_check_extension(s, KVM_CAP_COALESCED_PIO);
21222123

2123-
s->manual_dirty_log_protect =
2124+
dirty_log_manual_caps =
21242125
kvm_check_extension(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
2125-
if (s->manual_dirty_log_protect) {
2126-
ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0, 1);
2126+
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
2127+
KVM_DIRTY_LOG_INITIALLY_SET);
2128+
s->manual_dirty_log_protect = dirty_log_manual_caps;
2129+
if (dirty_log_manual_caps) {
2130+
ret = kvm_vm_enable_cap(s, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, 0,
2131+
dirty_log_manual_caps);
21272132
if (ret) {
2128-
warn_report("Trying to enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 "
2129-
"but failed. Falling back to the legacy mode. ");
2130-
s->manual_dirty_log_protect = false;
2133+
warn_report("Trying to enable capability %"PRIu64" of "
2134+
"KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 but failed. "
2135+
"Falling back to the legacy mode. ",
2136+
dirty_log_manual_caps);
2137+
s->manual_dirty_log_protect = 0;
21312138
}
21322139
}
21332140

accel/tcg/translate-all.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2582,9 +2582,9 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
25822582
/* This function should never be called with addresses outside the
25832583
guest address space. If this assert fires, it probably indicates
25842584
a missing call to h2g_valid. */
2585-
#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2586-
assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2587-
#endif
2585+
if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
2586+
assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2587+
}
25882588

25892589
if (len == 0) {
25902590
return 0;

configure

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ plugins="no"
518518
fuzzing="no"
519519
rng_none="no"
520520
secret_keyring=""
521+
libdaxctl=""
521522

522523
supported_cpu="no"
523524
supported_os="no"
@@ -1626,6 +1627,10 @@ for opt do
16261627
;;
16271628
--disable-keyring) secret_keyring="no"
16281629
;;
1630+
--enable-libdaxctl) libdaxctl=yes
1631+
;;
1632+
--disable-libdaxctl) libdaxctl=no
1633+
;;
16291634
*)
16301635
echo "ERROR: unknown option $opt"
16311636
echo "Try '$0 --help' for more information"
@@ -1927,6 +1932,7 @@ disabled with --disable-FEATURE, default is enabled if available:
19271932
libpmem libpmem support
19281933
xkbcommon xkbcommon support
19291934
rng-none dummy RNG, avoid using /dev/(u)random and getrandom()
1935+
libdaxctl libdaxctl support
19301936
19311937
NOTE: The object files are built at the place where configure is launched
19321938
EOF
@@ -6360,6 +6366,24 @@ if test "$libpmem" != "no"; then
63606366
fi
63616367
fi
63626368

6369+
##########################################
6370+
# check for libdaxctl
6371+
6372+
if test "$libdaxctl" != "no"; then
6373+
if $pkg_config --atleast-version=57 "libdaxctl"; then
6374+
libdaxctl="yes"
6375+
libdaxctl_libs=$($pkg_config --libs libdaxctl)
6376+
libdaxctl_cflags=$($pkg_config --cflags libdaxctl)
6377+
libs_softmmu="$libs_softmmu $libdaxctl_libs"
6378+
QEMU_CFLAGS="$QEMU_CFLAGS $libdaxctl_cflags"
6379+
else
6380+
if test "$libdaxctl" = "yes" ; then
6381+
feature_not_found "libdaxctl" "Install libdaxctl"
6382+
fi
6383+
libdaxctl="no"
6384+
fi
6385+
fi
6386+
63636387
##########################################
63646388
# check for slirp
63656389

@@ -6967,6 +6991,7 @@ echo "parallels support $parallels"
69676991
echo "sheepdog support $sheepdog"
69686992
echo "capstone $capstone"
69696993
echo "libpmem support $libpmem"
6994+
echo "libdaxctl support $libdaxctl"
69706995
echo "libudev $libudev"
69716996
echo "default devices $default_devices"
69726997
echo "plugin support $plugins"
@@ -7800,6 +7825,10 @@ if test "$libpmem" = "yes" ; then
78007825
echo "CONFIG_LIBPMEM=y" >> $config_host_mak
78017826
fi
78027827

7828+
if test "$libdaxctl" = "yes" ; then
7829+
echo "CONFIG_LIBDAXCTL=y" >> $config_host_mak
7830+
fi
7831+
78037832
if test "$bochs" = "yes" ; then
78047833
echo "CONFIG_BOCHS=y" >> $config_host_mak
78057834
fi

cpus.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,13 @@ static int64_t tcg_get_icount_limit(void)
13741374
}
13751375
}
13761376

1377+
static void notify_aio_contexts(void)
1378+
{
1379+
/* Wake up other AioContexts. */
1380+
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1381+
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1382+
}
1383+
13771384
static void handle_icount_deadline(void)
13781385
{
13791386
assert(qemu_in_vcpu_thread());
@@ -1382,9 +1389,7 @@ static void handle_icount_deadline(void)
13821389
QEMU_TIMER_ATTR_ALL);
13831390

13841391
if (deadline == 0) {
1385-
/* Wake up other AioContexts. */
1386-
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1387-
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1392+
notify_aio_contexts();
13881393
}
13891394
}
13901395
}
@@ -1407,6 +1412,10 @@ static void prepare_icount_for_run(CPUState *cpu)
14071412
cpu->icount_extra = cpu->icount_budget - insns_left;
14081413

14091414
replay_mutex_lock();
1415+
1416+
if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
1417+
notify_aio_contexts();
1418+
}
14101419
}
14111420
}
14121421

docs/index.html.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
<li><a href="tools/index.html">Tools Guide</a></li>
1313
<li><a href="interop/index.html">System Emulation Management and Interoperability Guide</a></li>
1414
<li><a href="specs/index.html">System Emulation Guest Hardware Specifications</a></li>
15-
<li><a href="qemu-qmp-ref.html">QMP Reference Manual</a></li>
16-
<li><a href="qemu-ga-ref.html">Guest Agent Protocol Reference</a></li>
15+
<li><a href="interop/qemu-qmp-ref.html">QMP Reference Manual</a></li>
16+
<li><a href="interop/qemu-ga-ref.html">Guest Agent Protocol Reference</a></li>
1717
</ul>
1818
</body>
1919
</html>

docs/nvdimm.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,16 @@ address to the page size (getpagesize(2)) by default. However, some
132132
types of backends may require an alignment different than the page
133133
size. In that case, QEMU v2.12.0 and later provide 'align' option to
134134
memory-backend-file to allow users to specify the proper alignment.
135+
For device dax (e.g., /dev/dax0.0), this alignment needs to match the
136+
alignment requirement of the device dax. The NUM of 'align=NUM' option
137+
must be larger than or equal to the 'align' of device dax.
138+
We can use one of the following commands to show the 'align' of device dax.
139+
140+
ndctl list -X
141+
daxctl list -R
142+
143+
In order to get the proper 'align' of device dax, you need to install
144+
the library 'libdaxctl'.
135145

136146
For example, device dax require the 2 MB alignment, so we can use
137147
following QEMU command line options to use it (/dev/dax0.0) as the

docs/system/deprecated.rst

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -95,23 +95,6 @@ error in the future.
9595
The ``-realtime mlock=on|off`` argument has been replaced by the
9696
``-overcommit mem-lock=on|off`` argument.
9797

98-
``-numa node,mem=``\ *size* (since 4.1)
99-
'''''''''''''''''''''''''''''''''''''''
100-
101-
The parameter ``mem`` of ``-numa node`` is used to assign a part of
102-
guest RAM to a NUMA node. But when using it, it's impossible to manage specified
103-
RAM chunk on the host side (like bind it to a host node, setting bind policy, ...),
104-
so guest end-ups with the fake NUMA configuration with suboptiomal performance.
105-
However since 2014 there is an alternative way to assign RAM to a NUMA node
106-
using parameter ``memdev``, which does the same as ``mem`` and adds
107-
means to actualy manage node RAM on the host side. Use parameter ``memdev``
108-
with *memory-backend-ram* backend as an replacement for parameter ``mem``
109-
to achieve the same fake NUMA effect or a properly configured
110-
*memory-backend-file* backend to actually benefit from NUMA configuration.
111-
In future new machine versions will not accept the option but it will still
112-
work with old machine types. User can check QAPI schema to see if the legacy
113-
option is supported by looking at MachineInfo::numa-mem-supported property.
114-
11598
``-numa`` node (without memory specified) (since 4.1)
11699
'''''''''''''''''''''''''''''''''''''''''''''''''''''
117100

@@ -553,3 +536,23 @@ long starting at 1MiB, the old command::
553536
can be rewritten as::
554537

555538
qemu-nbd -t --image-opts driver=raw,offset=1M,size=100M,file.driver=qcow2,file.file.driver=file,file.file.filename=file.qcow2
539+
540+
Command line options
541+
--------------------
542+
543+
``-numa node,mem=``\ *size* (removed in 5.1)
544+
''''''''''''''''''''''''''''''''''''''''''''
545+
546+
The parameter ``mem`` of ``-numa node`` was used to assign a part of
547+
guest RAM to a NUMA node. But when using it, it's impossible to manage a specified
548+
RAM chunk on the host side (like bind it to a host node, setting bind policy, ...),
549+
so the guest ends up with the fake NUMA configuration with suboptiomal performance.
550+
However since 2014 there is an alternative way to assign RAM to a NUMA node
551+
using parameter ``memdev``, which does the same as ``mem`` and adds
552+
means to actually manage node RAM on the host side. Use parameter ``memdev``
553+
with *memory-backend-ram* backend as replacement for parameter ``mem``
554+
to achieve the same fake NUMA effect or a properly configured
555+
*memory-backend-file* backend to actually benefit from NUMA configuration.
556+
New machine versions (since 5.1) will not accept the option but it will still
557+
work with old machine types. User can check the QAPI schema to see if the legacy
558+
option is supported by looking at MachineInfo::numa-mem-supported property.

exec.c

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@
7777

7878
#include "monitor/monitor.h"
7979

80+
#ifdef CONFIG_LIBDAXCTL
81+
#include <daxctl/libdaxctl.h>
82+
#endif
83+
8084
//#define DEBUG_SUBPAGE
8185

8286
#if !defined(CONFIG_USER_ONLY)
@@ -1745,6 +1749,46 @@ static int64_t get_file_size(int fd)
17451749
return size;
17461750
}
17471751

1752+
static int64_t get_file_align(int fd)
1753+
{
1754+
int64_t align = -1;
1755+
#if defined(__linux__) && defined(CONFIG_LIBDAXCTL)
1756+
struct stat st;
1757+
1758+
if (fstat(fd, &st) < 0) {
1759+
return -errno;
1760+
}
1761+
1762+
/* Special handling for devdax character devices */
1763+
if (S_ISCHR(st.st_mode)) {
1764+
g_autofree char *path = NULL;
1765+
g_autofree char *rpath = NULL;
1766+
struct daxctl_ctx *ctx;
1767+
struct daxctl_region *region;
1768+
int rc = 0;
1769+
1770+
path = g_strdup_printf("/sys/dev/char/%d:%d",
1771+
major(st.st_rdev), minor(st.st_rdev));
1772+
rpath = realpath(path, NULL);
1773+
1774+
rc = daxctl_new(&ctx);
1775+
if (rc) {
1776+
return -1;
1777+
}
1778+
1779+
daxctl_region_foreach(ctx, region) {
1780+
if (strstr(rpath, daxctl_region_get_path(region))) {
1781+
align = daxctl_region_get_align(region);
1782+
break;
1783+
}
1784+
}
1785+
daxctl_unref(ctx);
1786+
}
1787+
#endif /* defined(__linux__) && defined(CONFIG_LIBDAXCTL) */
1788+
1789+
return align;
1790+
}
1791+
17481792
static int file_ram_open(const char *path,
17491793
const char *region_name,
17501794
bool *created,
@@ -2296,7 +2340,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
22962340
{
22972341
RAMBlock *new_block;
22982342
Error *local_err = NULL;
2299-
int64_t file_size;
2343+
int64_t file_size, file_align;
23002344

23012345
/* Just support these ram flags by now. */
23022346
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
@@ -2332,6 +2376,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
23322376
return NULL;
23332377
}
23342378

2379+
file_align = get_file_align(fd);
2380+
if (file_align > 0 && mr && file_align > mr->align) {
2381+
error_setg(errp, "backing store align 0x%" PRIx64
2382+
" is larger than 'align' option 0x%" PRIx64,
2383+
file_align, mr->align);
2384+
return NULL;
2385+
}
2386+
23352387
new_block = g_malloc0(sizeof(*new_block));
23362388
new_block->mr = mr;
23372389
new_block->used_length = size;

0 commit comments

Comments
 (0)