Skip to content

Commit c7f018f

Browse files
committed
vng, run: support specifying / autodetecting disk emulation options
This implements support for various disk topology and I/O driver options for both `--disk` and `--blk-disk` arguments. Options are accepted as a comma-separated list after the image file path e.g., `--disk /path/to/file,format=qcow2,...`. In addition to general QEMU options (format=), I/O driver options (cache=, aio=, discard=, detect-zeroes=, queues=) and topology options (log-sec=, phy-sec=, min-io=, opt-io=, disc-gran=) a "topology=" metaoption is accepted to pass through host device queue limits. The names for these options were chosen to match `lsblk` columns rather than QEMU's own -drive/-device options, because the latter are underdocumented and non-uniform. Signed-off-by: Ivan Shapovalov <intelfx@intelfx.name>
1 parent bdda90b commit c7f018f

File tree

3 files changed

+287
-7
lines changed

3 files changed

+287
-7
lines changed

virtme/commands/run.py

Lines changed: 266 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import tempfile
2121
import termios
2222
from base64 import b64encode
23+
from collections.abc import Callable
2324
from dataclasses import dataclass
2425
from pathlib import Path
2526
from shutil import which
@@ -36,6 +37,7 @@
3637
VIRTME_SSH_HOSTNAME_CID_SEPARATORS,
3738
get_conf,
3839
scsi_device_id,
40+
strtobool,
3941
)
4042

4143
from .. import architectures, mkinitramfs, modfinder, qemu_helpers, resources, virtmods
@@ -885,21 +887,148 @@ def quote_karg(arg: str) -> str:
885887
class DiskArg:
886888
name: str
887889
path: str
890+
opts: dict[str, str]
891+
892+
_OPTS_HELP = {
893+
# meta parameters
894+
"topology": ("bool", "Forward host device topology (sector and I/O sizes)"),
895+
"iothread": ("bool", "Create a dedicated I/O thread for the disk"),
896+
# general format parameters
897+
"format": ("str", "Disk image format (raw|qcow2)"),
898+
# I/O driver parameters
899+
"cache": ("str", "Cache mode (none|writeback|writethrough|directsync|unsafe)"),
900+
"aio": ("str", "Asynchronous I/O mode (native|threads|io_uring)"),
901+
"discard": (
902+
"bool",
903+
"Pass through TRIM/UNMAP requests (true=unmap, false=ignore)",
904+
),
905+
"detect-zeroes": ("bool", "Detect all-zero writes (true=on/unmap, false=off)"),
906+
"queues": ("int", "Number of I/O queues"),
907+
# topology parameters
908+
# "alignment": ("bytes", "Block alignment offset"),
909+
"log-sec": ("bytes", "Logical (LBA) sector size (typically 512 or 4096)"),
910+
"phy-sec": ("bytes", "Physical (underlying) sector size (>=log-sec)"),
911+
"min-io": ("bytes", "Minimum I/O request size"),
912+
"opt-io": ("bytes", "Optimal I/O request size"),
913+
"rota": ("bool", "Device is rotational"),
914+
# "wzeroes": ("bytes", "Maximum WRITE ZEROES request size"),
915+
# "disc-aln": ("bytes", "TRIM/UNMAP alignment offset"),
916+
"disc-gran": ("bytes", "TRIM/UNMAP request granularity"),
917+
# "disc-max": ("bytes", "Maximum TRIM/UNMAP request size"),
918+
# "disc-zero": ("bool", "TRIM/UNMAP zeroes data"),
919+
}
920+
921+
def __post_init__(self):
922+
if self.pop_opt("topology", strtobool, False):
923+
self.opts = self.topology() | self.opts
924+
925+
def get_opt(
926+
self, name: str, parser: Callable[[str], Any] = str, default: Any = None
927+
) -> Any:
928+
opt = self.opts.get(name, None)
929+
return parser(opt) if opt is not None else default
930+
931+
def pop_opt(
932+
self, name: str, parser: Callable[[str], Any] = str, default: Any = None
933+
) -> Any:
934+
opt = self.opts.pop(name, None)
935+
return parser(opt) if opt is not None else default
936+
937+
def pop_opt_qemu(
938+
self,
939+
name: str,
940+
default: Any = None,
941+
*,
942+
parser: Callable[[str], Any] = str,
943+
dest: str | None = None,
944+
) -> str | None:
945+
opt = self.pop_opt(name, parser, default)
946+
# return DiskArg.qemu_opt(name=qemu if qemu is not None else name, value=opt)
947+
if opt is None:
948+
return None
949+
if isinstance(opt, bool):
950+
opt = "on" if opt else "off"
951+
return f"{dest if dest is not None else name}={opt}"
952+
953+
def topology(self) -> dict[str, str]:
954+
# Get the real device name (handles symlinks like /dev/mapper -> /dev/dm-X)
955+
real_path = os.path.realpath(self.path, strict=True)
956+
dev_name = os.path.basename(real_path)
957+
sys_base = Path(f"/sys/block/{dev_name}")
958+
959+
attributes = {
960+
# 'alignment': ('alignment_offset', int),
961+
"log-sec": ("queue/logical_block_size", int),
962+
"phy-sec": ("queue/physical_block_size", int),
963+
"min-io": ("queue/minimum_io_size", int),
964+
"opt-io": ("queue/optimal_io_size", int),
965+
"rota": ("queue/rotational", bool),
966+
# 'wzeroes': ('queue/write_zeroes_max_bytes', int),
967+
# 'disc-aln': ('discard_alignment', int),
968+
"disc-gran": ("queue/discard_granularity", int),
969+
# 'disc-max': ('queue/discard_max_bytes', int),
970+
# 'disc-zero': ('queue/discard_zeroes_data', bool),
971+
}
972+
973+
result = {}
974+
for key, (path, parser) in attributes.items():
975+
try:
976+
value = sys_base.joinpath(path).read_text().strip()
977+
if parser is int:
978+
parsed = parser(value)
979+
if parsed <= 0:
980+
continue
981+
result[key] = value
982+
except FileNotFoundError:
983+
pass
984+
except ValueError:
985+
pass
986+
return result
888987

889988
# Validate name=path arguments from --disk and --blk-disk
890989
@classmethod
891990
def parse(cls, func: str, arg: str) -> "DiskArg":
892-
name, sep, fn = arg.partition("=")
991+
items = arg.split(",")
992+
993+
namefile = items[0]
994+
extra = items[1:]
995+
996+
name, sep, fn = namefile.partition("=")
893997
if not (name and sep and fn):
894998
arg_fail(f"invalid argument to {func}: {arg}")
895999
if "=" in fn or "," in fn:
8961000
arg_fail(f"{func} filenames cannot contain '=' or ',': {fn}")
8971001
if "=" in name or "," in name:
8981002
arg_fail(f"{func} device names cannot contain '=' or ',': {name}")
8991003

1004+
opts = dict()
1005+
for i in extra:
1006+
key, sep, value = i.partition("=")
1007+
if not key:
1008+
arg_fail(f"invalid argument to {func}: {arg}")
1009+
if sep:
1010+
opts[key] = value
1011+
else:
1012+
opts[key] = "1"
1013+
1014+
if "help" in opts:
1015+
print(
1016+
"\n".join(
1017+
[
1018+
f"Possible {func} options:",
1019+
]
1020+
+ [
1021+
"{:<20} {}".format(f"{key}=({typ})", value)
1022+
for key, (typ, value) in DiskArg._OPTS_HELP.items()
1023+
]
1024+
)
1025+
)
1026+
sys.exit(0)
1027+
9001028
return cls(
9011029
name=name,
9021030
path=fn,
1031+
opts=opts,
9031032
)
9041033

9051034

@@ -1576,6 +1705,8 @@ def do_it() -> int:
15761705
if args.cpus:
15771706
qemuargs.extend(["-smp", args.cpus])
15781707

1708+
iothread_index = 0
1709+
15791710
if args.blk_disk:
15801711
for i, d in enumerate(args.blk_disk):
15811712
driveid = f"blk-disk{i}"
@@ -1585,14 +1716,64 @@ def do_it() -> int:
15851716
"if=none",
15861717
f"id={driveid}",
15871718
f"file={disk.path}",
1588-
"format=raw",
15891719
]
15901720
device_opts = [
15911721
arch.virtio_dev_type("blk"),
15921722
f"drive={driveid}",
15931723
f"serial={disk.name}",
15941724
]
15951725

1726+
# we need those parameters multiple times
1727+
discard = disk.pop_opt("discard", parser=strtobool, default=None)
1728+
detect_zeroes = disk.pop_opt(
1729+
"detect-zeroes", parser=strtobool, default=None
1730+
)
1731+
# we need this parameter both to transform other parameters and as itself later
1732+
# log_sec = disk.get_opt("log-sec", parser=int, default=512)
1733+
1734+
drive_opts.extend(
1735+
[
1736+
disk.pop_opt_qemu("format", "raw"),
1737+
disk.pop_opt_qemu("cache", None),
1738+
disk.pop_opt_qemu("aio", None),
1739+
f"discard={'unmap' if discard else 'ignore'}"
1740+
if discard is not None
1741+
else None,
1742+
f"detect-zeroes={('unmap' if discard else 'on') if detect_zeroes else 'off'}"
1743+
if detect_zeroes is not None
1744+
else None,
1745+
]
1746+
)
1747+
1748+
device_opts.extend(
1749+
[
1750+
f"discard={'on' if discard else 'off'}"
1751+
if discard is not None
1752+
else None,
1753+
disk.pop_opt_qemu("disc-gran", dest="discard_granularity"),
1754+
disk.pop_opt_qemu("log-sec", dest="logical_block_size"),
1755+
disk.pop_opt_qemu("phy-sec", dest="physical_block_size"),
1756+
# disk.pop_qemu("disc-max", dest="max-discard-sectors", parser=lambda arg: int(arg) / log_sec),
1757+
# disk.pop_qemu("wzeroes", dest="max-write-zeroes-sectors", parser=lambda arg: int(arg) / log_sec),
1758+
disk.pop_opt_qemu("min-io", dest="min_io_size"),
1759+
disk.pop_opt_qemu("opt-io", dest="opt_io_size"),
1760+
disk.pop_opt_qemu("queues", dest="num-queues"),
1761+
]
1762+
)
1763+
# unused
1764+
disk.opts.pop("rota", None)
1765+
1766+
if disk.pop_opt("iothread", bool, False):
1767+
iothreadid = f"iothread{iothread_index}"
1768+
iothread_index += 1
1769+
qemuargs.extend(
1770+
[
1771+
"-object",
1772+
f"iothread,id={iothreadid}",
1773+
]
1774+
)
1775+
device_opts.append(f"iothread={iothreadid}")
1776+
15961777
qemuargs.extend(
15971778
[
15981779
"-drive",
@@ -1602,41 +1783,121 @@ def do_it() -> int:
16021783
]
16031784
)
16041785

1605-
if args.disk:
1606-
qemuargs.extend(["-device", "{},id=scsi".format(arch.virtio_dev_type("scsi"))])
1786+
# any options that were not consumed are errors
1787+
if disk.opts:
1788+
raise ValueError(
1789+
f"invalid --disk parameter: {d!r}\n(keys were not consumed: {disk.opts.keys()})"
1790+
)
16071791

1792+
if args.disk:
16081793
for i, d in enumerate(args.disk):
1794+
scsiid = f"scsi{i}"
16091795
driveid = f"disk{i}"
16101796
disk = DiskArg.parse("--disk", d)
16111797

16121798
# scsi-hd.device_id= is normally defaulted to scsi-hd.serial=,
16131799
# but it must not be longer than 20 characters
16141800
device_id = scsi_device_id(disk.name, 20)
16151801

1802+
scsi_opts = [
1803+
arch.virtio_dev_type("scsi"),
1804+
f"id={scsiid}",
1805+
]
16161806
drive_opts = [
16171807
"if=none",
16181808
f"id={driveid}",
16191809
f"file={disk.path}",
1620-
"format=raw",
16211810
]
16221811
device_opts = [
16231812
"scsi-hd",
16241813
f"drive={driveid}",
1814+
f"bus={scsiid}.0",
16251815
"vendor=virtme",
16261816
"product=disk",
16271817
f"serial={disk.name}",
16281818
f"device_id={device_id}" if device_id != disk.name else None,
16291819
]
16301820

1821+
# we need those parameters multiple times
1822+
discard = disk.pop_opt("discard", parser=strtobool, default=None)
1823+
detect_zeroes = disk.pop_opt(
1824+
"detect-zeroes", parser=strtobool, default=None
1825+
)
1826+
# we need this parameter both to transform other parameters and as itself later
1827+
log_sec = disk.get_opt("log-sec")
1828+
1829+
drive_opts.extend(
1830+
[
1831+
disk.pop_opt_qemu("format", "raw"),
1832+
disk.pop_opt_qemu("cache", None),
1833+
disk.pop_opt_qemu("aio", None),
1834+
f"discard={'unmap' if discard else 'ignore'}"
1835+
if discard is not None
1836+
else None,
1837+
f"detect-zeroes={('unmap' if discard else 'on') if detect_zeroes else 'off'}"
1838+
if detect_zeroes is not None
1839+
else None,
1840+
]
1841+
)
1842+
1843+
scsi_opts.extend(
1844+
[
1845+
disk.pop_opt_qemu("queues", dest="num-queues"),
1846+
]
1847+
)
1848+
1849+
device_opts.extend(
1850+
[
1851+
disk.pop_opt_qemu("disc-gran", dest="discard_granularity"),
1852+
disk.pop_opt_qemu("log-sec", dest="logical_block_size"),
1853+
# convenience: QEMU does not automatically adjust physical_block_size
1854+
# to be not less than logical_block_size (it errors out instead), so we do it here
1855+
disk.pop_opt_qemu(
1856+
"phy-sec", dest="physical_block_size", default=log_sec
1857+
),
1858+
# disk.pop_qemu("disc-max", dest="max_unmap_size"),
1859+
# disk.pop_qemu("wzeroes", dest="???"),
1860+
disk.pop_opt_qemu("min-io", dest="min_io_size"),
1861+
disk.pop_opt_qemu("opt-io", dest="opt_io_size"),
1862+
# sic: set rotation_rate to "1" for non-rotating disks ("1" is a special value
1863+
# that means "non-rotating medium"), but set to "0" for rotating disks
1864+
# ("0" means "rotation rate not reported").
1865+
disk.pop_opt_qemu(
1866+
"rota",
1867+
dest="rotation_rate",
1868+
parser=lambda arg: "0" if strtobool(arg) else "1",
1869+
),
1870+
]
1871+
)
1872+
1873+
if disk.pop_opt("iothread", bool, False):
1874+
iothreadid = f"iothread{iothread_index}"
1875+
iothread_index += 1
1876+
qemuargs.extend(
1877+
[
1878+
"-object",
1879+
f"iothread,id={iothreadid}",
1880+
]
1881+
)
1882+
scsi_opts.append(f"iothread={iothreadid}")
1883+
16311884
qemuargs.extend(
16321885
[
16331886
"-drive",
16341887
",".join(o for o in drive_opts if o is not None),
16351888
"-device",
1889+
",".join(o for o in scsi_opts if o is not None),
1890+
"-device",
16361891
",".join(o for o in device_opts if o is not None),
16371892
]
16381893
)
16391894

1895+
# any options that were not consumed are errors
1896+
if disk.opts:
1897+
raise ValueError(
1898+
f"invalid --disk parameter: {d!r}\n(keys were not consumed: {disk.opts.keys()})"
1899+
)
1900+
16401901
ret_path = None
16411902

16421903
def cleanup_script_retcode():

virtme_ng/run.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,8 +1171,17 @@ def _get_virtme_disk(self, args):
11711171
disk_str = ""
11721172

11731173
def ensure_name(dsk: str) -> str:
1174-
if "=" not in dsk:
1175-
return f"{dsk}={dsk}"
1174+
"""
1175+
`dsk` is a comma-separated list of disk options (KEY=VAL), with the first
1176+
option specifying the disk name and path (NAME=PATH). As an exception,
1177+
NAME can be omitted (but the underlying implementation does not know that).
1178+
This function ensures that the first option has a NAME, and adds one
1179+
equal to the PATH if it is missing.
1180+
"""
1181+
items = dsk.split(",")
1182+
first = items[0]
1183+
if "=" not in first:
1184+
return f"{first}={dsk}"
11761185
return dsk
11771186

11781187
if args.disk is not None:

0 commit comments

Comments
 (0)