Skip to content

Commit 54a733b

Browse files
authored
Improve vmdk extent descriptor parsing (2) (#47)
1 parent 50464c1 commit 54a733b

File tree

2 files changed

+270
-34
lines changed

2 files changed

+270
-34
lines changed

dissect/hypervisor/disk/vmdk.py

Lines changed: 90 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
from __future__ import annotations
2+
13
import ctypes
24
import io
35
import logging
46
import os
7+
import re
58
import textwrap
69
import zlib
710
from bisect import bisect_right
11+
from dataclasses import dataclass
812
from functools import lru_cache
913
from pathlib import Path
1014

@@ -59,13 +63,13 @@ def __init__(self, fh):
5963
if self.descriptor.attr["parentCID"] != "ffffffff":
6064
self.parent = open_parent(path.parent, self.descriptor.attr["parentFileNameHint"])
6165

62-
for _, size, extent_type, filename in self.descriptor.extents:
63-
if extent_type in ["SPARSE", "VMFSSPARSE", "SESPARSE"]:
64-
sdisk_fh = path.with_name(filename).open("rb")
66+
for extent in self.descriptor.extents:
67+
if extent.type in ["SPARSE", "VMFSSPARSE", "SESPARSE"]:
68+
sdisk_fh = path.with_name(extent.filename).open("rb")
6569
self.disks.append(SparseDisk(sdisk_fh, parent=self.parent))
66-
elif extent_type in ["VMFS", "FLAT"]:
67-
rdisk_fh = path.with_name(filename).open("rb")
68-
self.disks.append(RawDisk(rdisk_fh, size * SECTOR_SIZE))
70+
elif extent.type in ["VMFS", "FLAT"]:
71+
rdisk_fh = path.with_name(extent.filename).open("rb")
72+
self.disks.append(RawDisk(rdisk_fh, extent.sectors * SECTOR_SIZE))
6973

7074
elif magic in (COWD_MAGIC, VMDK_MAGIC, SESPARSE_MAGIC):
7175
sparse_disk = SparseDisk(fh)
@@ -398,18 +402,69 @@ def __getattr__(self, attr):
398402
return getattr(self.hdr, attr)
399403

400404

405+
RE_EXTENT_DESCRIPTOR = re.compile(
406+
r"""
407+
^
408+
(?P<access_mode>RW|RDONLY|NOACCESS)\s
409+
(?P<sectors>\d+)\s
410+
(?P<type>SPARSE|ZERO|FLAT|VMFS|VMFSSPARSE|VMFSRDM|VMFSRAW)
411+
(\s(?P<filename>\".+\"))?
412+
(\s(?P<start_sector>\d+))?
413+
(\s(?P<partition_uuid>\S+))?
414+
(\s(?P<device_identifier>\S+))?
415+
$
416+
""",
417+
re.VERBOSE,
418+
)
419+
420+
421+
@dataclass
422+
class ExtentDescriptor:
423+
raw: str
424+
access_mode: str
425+
sectors: int
426+
type: str
427+
filename: str | None = None
428+
start_sector: int | None = None
429+
partition_uuid: str | None = None
430+
device_identifier: str | None = None
431+
432+
def __post_init__(self) -> None:
433+
self.sectors = int(self.sectors)
434+
435+
if self.filename:
436+
self.filename = self.filename.strip('"')
437+
438+
if self.start_sector:
439+
self.start_sector = int(self.start_sector)
440+
441+
def __repr__(self) -> str:
442+
return f"<ExtentDescriptor {self.raw}>"
443+
444+
def __str__(self) -> str:
445+
return self.raw
446+
447+
401448
class DiskDescriptor:
402-
def __init__(self, attr, extents, disk_db, sectors, raw_config=None):
449+
def __init__(
450+
self, attr: dict, extents: list[ExtentDescriptor], disk_db: dict, sectors: int, raw_config: str | None = None
451+
):
403452
self.attr = attr
404453
self.extents = extents
405454
self.ddb = disk_db
406455
self.sectors = sectors
407456
self.raw = raw_config
408457

409458
@classmethod
410-
def parse(cls, vmdk_config):
459+
def parse(cls, vmdk_config: str) -> DiskDescriptor:
460+
"""Return :class:`DiskDescriptor` based on the provided ``vmdk_config``.
461+
462+
Resources:
463+
- https://github.com/libyal/libvmdk/blob/main/documentation/VMWare%20Virtual%20Disk%20Format%20(VMDK).asciidoc
464+
""" # noqa: E501
465+
411466
descriptor_settings = {}
412-
extents = []
467+
extents: list[ExtentDescriptor] = []
413468
disk_db = {}
414469
sectors = 0
415470

@@ -420,11 +475,15 @@ def parse(cls, vmdk_config):
420475
continue
421476

422477
if line.startswith("RW ") or line.startswith("RDONLY ") or line.startswith("NOACCESS "):
423-
access_type, size, extent_type, filename = line.split(" ", 3)
424-
filename = filename.strip('"')
425-
size = int(size)
426-
sectors += size
427-
extents.append([access_type, size, extent_type, filename])
478+
match = RE_EXTENT_DESCRIPTOR.search(line)
479+
480+
if not match:
481+
log.warning("Unexpected ExtentDescriptor format in vmdk config: %s, ignoring", line)
482+
continue
483+
484+
extent = ExtentDescriptor(raw=line, **match.groupdict())
485+
sectors += extent.sectors
486+
extents.append(extent)
428487
continue
429488

430489
setting, _, value = line.partition("=")
@@ -438,35 +497,33 @@ def parse(cls, vmdk_config):
438497

439498
return cls(descriptor_settings, extents, disk_db, sectors, vmdk_config)
440499

441-
def __str__(self):
442-
str_template = """\
443-
# Disk DescriptorFile
444-
version=1
445-
{}
500+
def __str__(self) -> str:
501+
str_template = textwrap.dedent(
502+
"""\
503+
# Disk DescriptorFile
504+
version=1
505+
{}
446506
447-
# Extent Description
448-
{}
507+
# Extent Description
508+
{}
449509
450-
# The Disk Data Base
451-
#DDB
510+
# The Disk Data Base
511+
#DDB
512+
513+
{}"""
514+
)
452515

453-
{}"""
454-
str_template = textwrap.dedent(str_template)
455516
descriptor_settings = []
456517
for setting, value in self.attr.items():
457-
if setting == "version":
458-
continue
459-
descriptor_settings.append("{}={}".format(setting, value))
518+
if setting != "version":
519+
descriptor_settings.append(f"{setting}={value}")
460520
descriptor_settings = "\n".join(descriptor_settings)
461521

462-
extents = []
463-
for access_type, size, extent_type, filename in self.extents:
464-
extents.append('{} {} {} "{}"'.format(access_type, size, extent_type, filename))
465-
extents = "\n".join(extents)
522+
extents = "\n".join(map(str, self.extents))
466523

467524
disk_db = []
468525
for setting, value in self.ddb.items():
469-
disk_db.append('{} = "{}"'.format(setting, value))
526+
disk_db.append(f'{setting} = "{value}"')
470527
disk_db = "\n".join(disk_db)
471528

472529
return str_template.format(descriptor_settings, extents, disk_db)

tests/test_vmdk.py

Lines changed: 180 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
import pytest
2+
13
from dissect.hypervisor.disk.c_vmdk import c_vmdk
2-
from dissect.hypervisor.disk.vmdk import VMDK
4+
from dissect.hypervisor.disk.vmdk import VMDK, DiskDescriptor, ExtentDescriptor
35

46

57
def test_vmdk_sesparse(sesparse_vmdk):
@@ -18,3 +20,180 @@ def test_vmdk_sesparse(sesparse_vmdk):
1820
assert header.version == 0x200000001
1921

2022
assert vmdk.read(0x1000000) == b"a" * 0x1000000
23+
24+
25+
@pytest.mark.parametrize(
26+
"extent_description, expected_extents",
27+
[
28+
(
29+
'RW 123456789 SPARSE "disk.vmdk"',
30+
[
31+
ExtentDescriptor(
32+
raw='RW 123456789 SPARSE "disk.vmdk"',
33+
access_mode="RW",
34+
sectors=123456789,
35+
type="SPARSE",
36+
filename='"disk.vmdk"',
37+
start_sector=None,
38+
partition_uuid=None,
39+
device_identifier=None,
40+
),
41+
],
42+
),
43+
(
44+
'RW 123456789 FLAT "disk-flat.vmdk" 0',
45+
[
46+
ExtentDescriptor(
47+
raw='RW 123456789 FLAT "disk-flat.vmdk" 0',
48+
access_mode="RW",
49+
sectors=123456789,
50+
type="FLAT",
51+
filename='"disk-flat.vmdk"',
52+
start_sector=0,
53+
partition_uuid=None,
54+
device_identifier=None,
55+
)
56+
],
57+
),
58+
(
59+
"RDONLY 0 ZERO",
60+
[
61+
ExtentDescriptor(
62+
raw="RDONLY 0 ZERO",
63+
access_mode="RDONLY",
64+
sectors=0,
65+
type="ZERO",
66+
),
67+
],
68+
),
69+
(
70+
'NOACCESS 123456789 SPARSE "disk-sparse.vmdk" 123 partition-uuid device-id',
71+
[
72+
ExtentDescriptor(
73+
raw='NOACCESS 123456789 SPARSE "disk-sparse.vmdk" 123 partition-uuid device-id',
74+
access_mode="NOACCESS",
75+
sectors=123456789,
76+
type="SPARSE",
77+
filename='"disk-sparse.vmdk"',
78+
start_sector=123,
79+
partition_uuid="partition-uuid",
80+
device_identifier="device-id",
81+
),
82+
],
83+
),
84+
("RW 1234567890", []),
85+
('RDONLY "file.vmdk"', []),
86+
("NOACCESS", []),
87+
(
88+
'RW 1234567890 SPARSE "disk with spaces.vmdk"',
89+
[
90+
ExtentDescriptor(
91+
raw='RW 1234567890 SPARSE "disk with spaces.vmdk"',
92+
access_mode="RW",
93+
sectors=1234567890,
94+
type="SPARSE",
95+
filename='"disk with spaces.vmdk"',
96+
start_sector=None,
97+
partition_uuid=None,
98+
device_identifier=None,
99+
)
100+
],
101+
),
102+
(
103+
'RW 1234567890 SPARSE "disk with spaces.vmdk" 123',
104+
[
105+
ExtentDescriptor(
106+
raw='RW 1234567890 SPARSE "disk with spaces.vmdk" 123',
107+
access_mode="RW",
108+
sectors=1234567890,
109+
type="SPARSE",
110+
filename='"disk with spaces.vmdk"',
111+
start_sector=123,
112+
partition_uuid=None,
113+
device_identifier=None,
114+
)
115+
],
116+
),
117+
(
118+
'RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid',
119+
[
120+
ExtentDescriptor(
121+
raw='RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid',
122+
access_mode="RW",
123+
sectors=1234567890,
124+
type="SPARSE",
125+
filename='"disk with spaces.vmdk"',
126+
start_sector=123,
127+
partition_uuid="part-uuid",
128+
device_identifier=None,
129+
)
130+
],
131+
),
132+
(
133+
'RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid device-id',
134+
[
135+
ExtentDescriptor(
136+
raw='RW 1234567890 SPARSE "disk with spaces.vmdk" 123 part-uuid device-id',
137+
access_mode="RW",
138+
sectors=1234567890,
139+
type="SPARSE",
140+
filename='"disk with spaces.vmdk"',
141+
start_sector=123,
142+
partition_uuid="part-uuid",
143+
device_identifier="device-id",
144+
)
145+
],
146+
),
147+
(
148+
r'RW 16777216 SPARSE "this is an example "\' diskëäô:)\\\'`\foo.vmdk" 123',
149+
[
150+
ExtentDescriptor(
151+
raw=r'RW 16777216 SPARSE "this is an example "\' diskëäô:)\\\'`\foo.vmdk" 123',
152+
access_mode="RW",
153+
sectors=16777216,
154+
type="SPARSE",
155+
filename=r'"this is an example "\' diskëäô:)\\\'`\foo.vmdk"',
156+
start_sector=123,
157+
partition_uuid=None,
158+
device_identifier=None,
159+
)
160+
],
161+
),
162+
(
163+
r'RW 13371337 SPARSE "🦊 🦊 🦊.vmdk"',
164+
[
165+
ExtentDescriptor(
166+
raw=r'RW 13371337 SPARSE "🦊 🦊 🦊.vmdk"',
167+
access_mode="RW",
168+
sectors=13371337,
169+
type="SPARSE",
170+
filename='"🦊 🦊 🦊.vmdk"',
171+
)
172+
],
173+
),
174+
],
175+
ids=(
176+
"sparse",
177+
"flat",
178+
"zero",
179+
"sparse-ids",
180+
"bad-1",
181+
"bad-2",
182+
"bad-3",
183+
"spaces-four-parts",
184+
"spaces-five-parts",
185+
"spaces-six-parts",
186+
"spaces-seven-parts",
187+
"specials-five-parts",
188+
"emoji-four-parts",
189+
),
190+
)
191+
def test_vmdk_extent_description(extent_description: str, expected_extents: list) -> None:
192+
"""test if we correctly parse VMDK sparse and flat extent descriptions.
193+
194+
Resources:
195+
- https://github.com/libyal/libvmdk/blob/main/documentation/VMWare%20Virtual%20Disk%20Format%20(VMDK).asciidoc#22-extent-descriptions
196+
""" # noqa: E501
197+
198+
descriptor = DiskDescriptor.parse(extent_description)
199+
assert descriptor.extents == expected_extents

0 commit comments

Comments
 (0)