Skip to content

Commit a16a0a7

Browse files
authored
Add Apple Sparse Image Format (ASIF) (#63)
1 parent 2e2d0aa commit a16a0a7

File tree

7 files changed

+392
-1
lines changed

7 files changed

+392
-1
lines changed

dissect/hypervisor/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
from dissect.hypervisor.descriptor import hyperv, ovf, pvs, vbox, vmx
2-
from dissect.hypervisor.disk import hdd, qcow2, vdi, vhd, vhdx, vmdk
2+
from dissect.hypervisor.disk import asif, hdd, qcow2, vdi, vhd, vhdx, vmdk
33
from dissect.hypervisor.util import envelope, vmtar
44

55
__all__ = [
6+
"asif",
67
"envelope",
78
"hdd",
89
"hyperv",

dissect/hypervisor/disk/asif.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
from __future__ import annotations
2+
3+
import plistlib
4+
from functools import cached_property, lru_cache
5+
from typing import Any, BinaryIO
6+
from uuid import UUID
7+
8+
from dissect.util.stream import AlignedStream
9+
10+
from dissect.hypervisor.disk.c_asif import c_asif
11+
from dissect.hypervisor.exceptions import InvalidSignature
12+
13+
14+
class ASIF:
15+
"""Apple Sparse Image Format (ASIF) disk image.
16+
17+
ASIF disk images are a virtual disk format introduced in macOS Tahoe. They can be used in Apple's Virtualization
18+
framework, as well as through Disk Utility.
19+
20+
An ASIF file is pretty straight forward. There's a small header which, among some other details, contains two
21+
directory offsets. Each directory contains a list of tables, which in turn contain a list of data entries. Each data
22+
entry points to a chunk of data in the ASIF file. The chunk size is defined in the header and is typically 1 MiB.
23+
The chunk size is always a multiple of the block size, which is also defined in the header (typically 512 bytes).
24+
Each directory has a version number, and the directory with the highest version number is the active directory. This
25+
allows for atomic updates of the directory/table data.
26+
27+
The maximum virtual disk size seems to be just under 4 PiB, with a small portion at the end reserved for metadata.
28+
The actual size of the virtual disk is defined in the header, as well as the maximum size the disk can grow to.
29+
30+
The offset to the metadata block is typically ``(4 PiB - 1 chunk)``, meaning it's within the reserved area.
31+
The metadata block contains a small header and a plist. The plist should contain an ``internal metadata`` and
32+
``user metadata`` dictionary. Besides a "stable uuid", it's unclear what the metadata is used for or how to set it.
33+
34+
Args:
35+
fh: File-like object containing the ASIF image.
36+
37+
Resources:
38+
- Reversing ``diskimagescontroller``
39+
- https://developer.apple.com/documentation/virtualization/vzdiskimagestoragedeviceattachment/
40+
"""
41+
42+
def __init__(self, fh: BinaryIO):
43+
self.fh = fh
44+
45+
self.header = c_asif.asif_header(fh)
46+
if self.header.header_signature != c_asif.ASIF_HEADER_SIGNATURE:
47+
raise InvalidSignature(
48+
f"Not a valid ASIF image (expected {c_asif.ASIF_HEADER_SIGNATURE:#x}, "
49+
f"got {self.header.header_signature:#x})"
50+
)
51+
52+
self.guid = UUID(bytes=self.header.guid)
53+
self.block_size = self.header.block_size
54+
self.chunk_size = self.header.chunk_size
55+
self.size = self.header.sector_count * self.block_size
56+
self.max_size = self.header.max_sector_count * self.block_size
57+
58+
# The following math is taken from the assembly with some creative variable naming
59+
# It's possible that some of this can be simplified or the names improved
60+
self._blocks_per_chunk = self.chunk_size // self.block_size
61+
62+
# This check doesn't really make sense, but keep it in for now
63+
reserved_size = 4 * self.chunk_size
64+
self._num_reserved_table_entries = (
65+
1 if reserved_size < self._blocks_per_chunk else reserved_size // self._blocks_per_chunk
66+
)
67+
68+
self._max_table_entries = self.chunk_size >> 3
69+
self._num_table_entries = self._max_table_entries - (
70+
self._max_table_entries % (self._num_reserved_table_entries + 1)
71+
)
72+
self._num_reserved_directory_entries = (self._num_reserved_table_entries + self._num_table_entries) // (
73+
self._num_reserved_table_entries + 1
74+
)
75+
self._num_usable_entries = self._num_table_entries - self._num_reserved_directory_entries
76+
# This is the size in bytes of data covered by a single table
77+
self._size_per_table = self._num_usable_entries * self.chunk_size
78+
79+
max_size = self.block_size * self.header.max_sector_count
80+
self._num_directory_entries = (self._size_per_table + max_size - 1) // self._size_per_table
81+
82+
self._aligned_table_size = (
83+
(self.block_size + 8 * self._num_table_entries - 1) // self.block_size * self.block_size
84+
)
85+
86+
self.directories = sorted(
87+
(Directory(self, offset) for offset in self.header.directory_offsets),
88+
key=lambda d: d.version,
89+
reverse=True,
90+
)
91+
self.active_directory = self.directories[0]
92+
93+
self.metadata_header = None
94+
self.metadata: dict[str, Any] = {}
95+
if self.header.metadata_chunk:
96+
# Open the file in reserved mode to read from the reserved area
97+
with self.open(reserved=True) as disk:
98+
metadata_offset = self.header.metadata_chunk * self.chunk_size
99+
disk.seek(metadata_offset)
100+
self.metadata_header = c_asif.asif_meta_header(disk)
101+
102+
if self.metadata_header.header_signature != c_asif.ASIF_META_HEADER_SIGNATURE:
103+
raise InvalidSignature(
104+
f"Invalid a ASIF metadata header (expected {c_asif.ASIF_META_HEADER_SIGNATURE:#x}, "
105+
f"got {self.metadata_header.header_signature:#x})"
106+
)
107+
108+
disk.seek(metadata_offset + self.metadata_header.header_size)
109+
self.metadata = plistlib.loads(disk.read(self.metadata_header.data_size).strip(b"\x00"))
110+
111+
@property
112+
def internal_metadata(self) -> dict[str, Any]:
113+
"""Get internal metadata from the ASIF image.
114+
115+
Returns:
116+
A dictionary containing the internal metadata.
117+
"""
118+
return self.metadata.get("internal metadata", {})
119+
120+
@property
121+
def user_metadata(self) -> dict[str, Any]:
122+
"""Get user metadata from the ASIF image.
123+
124+
Returns:
125+
A dictionary containing the user metadata.
126+
"""
127+
return self.metadata.get("user metadata", {})
128+
129+
def open(self, reserved: bool = False) -> DataStream:
130+
"""Open a stream to read the ASIF image data.
131+
132+
Args:
133+
reserved: Whether to allow reading into the reserved area of the ASIF image.
134+
135+
Returns:
136+
A stream-like object that can be used to read the image data.
137+
"""
138+
return DataStream(self, reserved)
139+
140+
141+
class Directory:
142+
"""ASIF Directory.
143+
144+
A directory has a version (``uint64``) followed by a list of table entries (``uint64[]``).
145+
The version number is used to determine the active directory, with the highest version being the active one.
146+
Each table entry is a chunk number and points to a table in the ASIF image.
147+
148+
Args:
149+
asif: The ASIF image this directory belongs to.
150+
offset: Offset of the directory in the ASIF image.
151+
"""
152+
153+
def __init__(self, asif: ASIF, offset: int):
154+
self.asif = asif
155+
self.offset = offset
156+
157+
self.asif.fh.seek(offset)
158+
self.version = c_asif.uint64(self.asif.fh)
159+
160+
self.table = lru_cache(128)(self.table)
161+
162+
def __repr__(self) -> str:
163+
return f"<Directory offset={self.offset:#x} version={self.version}>"
164+
165+
@cached_property
166+
def entries(self) -> list[int]:
167+
"""List of table entries in the directory."""
168+
# Seek over the version
169+
self.asif.fh.seek(self.offset + 8)
170+
return c_asif.uint64[self.asif._num_directory_entries](self.asif.fh)
171+
172+
def table(self, index: int) -> Table:
173+
"""Get a table from the directory.
174+
175+
Args:
176+
index: Index of the table in the directory.
177+
"""
178+
if index >= self.asif._num_directory_entries:
179+
raise IndexError("Table index out of range")
180+
return Table(self, index)
181+
182+
183+
class Table:
184+
"""ASIF Table.
185+
186+
A table contains a list of data entries (``uint64[]``). Each data entry is a chunk number and points to a chunk of
187+
data in the ASIF image. Each table covers a fixed amount of data in the virtual disk.
188+
189+
Data entries have 55 bits usable for the chunk number and 9 bits reserved for flags.
190+
191+
.. rubric :: Encoding
192+
.. code-block:: c
193+
194+
0b00000000 01111111 11111111 11111111 11111111 11111111 11111111 11111111 (chunk number)
195+
0b00111111 10000000 00000000 00000000 00000000 00000000 00000000 00000000 (reserved)
196+
0b01000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (entry dirty)
197+
0b10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (content dirty)
198+
199+
Args:
200+
directory: The directory this table belongs to.
201+
index: Index of the table in the directory.
202+
"""
203+
204+
def __init__(self, directory: Directory, index: int):
205+
self.asif = directory.asif
206+
self.directory = directory
207+
self.index = index
208+
209+
self.offset = self.directory.entries[index] * self.asif.chunk_size
210+
self.virtual_offset = index * self.asif._size_per_table
211+
212+
def __repr__(self) -> str:
213+
return f"<Table index={self.index} offset={self.offset:#x} virtual_offset={self.virtual_offset:#x}>"
214+
215+
@cached_property
216+
def entries(self) -> list[int]:
217+
"""List of data entries in the table."""
218+
self.asif.fh.seek(self.offset)
219+
return c_asif.uint64[self.asif._num_table_entries](self.asif.fh)
220+
221+
222+
class DataStream(AlignedStream):
223+
"""Stream to read data from an ASIF image.
224+
225+
Args:
226+
asif: The ASIF image to read from.
227+
reserved: Whether to allow reading into the reserved area of the ASIF image.
228+
"""
229+
230+
def __init__(self, asif: ASIF, reserved: bool = False):
231+
super().__init__(asif.max_size if reserved else asif.size, align=asif.chunk_size)
232+
self.asif = asif
233+
self.reserved = reserved
234+
self.directory = asif.active_directory
235+
236+
def _read(self, offset: int, length: int) -> bytes:
237+
result = []
238+
while length:
239+
table = self.directory.table(offset // self.asif._size_per_table)
240+
relative_block_index = (offset // self.asif.block_size) - (table.virtual_offset // self.asif.block_size)
241+
data_idx = (
242+
relative_block_index // self.asif._blocks_per_chunk
243+
+ relative_block_index // self.asif._blocks_per_chunk * self.asif._num_reserved_table_entries
244+
) // self.asif._num_reserved_table_entries
245+
246+
# 0x8000000000000000 = content dirty bit
247+
# 0x4000000000000000 = entry dirty bit
248+
# 0x3F80000000000000 = reserved bits
249+
chunk = table.entries[data_idx] & 0x7FFFFFFFFFFFFF
250+
raw_offset = chunk * self.asif.chunk_size
251+
252+
read_length = min(length, self.asif.chunk_size)
253+
if chunk == 0:
254+
result.append(b"\x00" * read_length)
255+
else:
256+
self.asif.fh.seek(raw_offset)
257+
result.append(self.asif.fh.read(read_length))
258+
259+
offset += read_length
260+
length -= read_length
261+
262+
return b"".join(result)

dissect/hypervisor/disk/c_asif.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from __future__ import annotations
2+
3+
from dissect.cstruct import cstruct
4+
5+
asif_def = """
6+
#define ASIF_HEADER_SIGNATURE 0x73686477 // 'shdw'
7+
#define ASIF_META_HEADER_SIGNATURE 0x6D657461 // 'meta'
8+
9+
struct asif_header {
10+
uint32 header_signature;
11+
uint32 header_version;
12+
uint32 header_size;
13+
uint32 header_flags;
14+
uint64 directory_offsets[2];
15+
char guid[16];
16+
uint64 sector_count;
17+
uint64 max_sector_count;
18+
uint32 chunk_size;
19+
uint16 block_size;
20+
uint16 total_segments;
21+
uint64 metadata_chunk;
22+
char unk_50[16];
23+
uint32 read_only_flags;
24+
uint32 metadata_flags;
25+
uint32 metadata_read_only_flags;
26+
};
27+
28+
struct asif_meta_header {
29+
uint32 header_signature;
30+
uint32 header_version;
31+
uint32 header_size;
32+
uint64 data_size;
33+
uint64 unk_14;
34+
};
35+
"""
36+
37+
c_asif = cstruct(endian=">").load(asif_def)

dissect/hypervisor/disk/c_asif.pyi

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Generated by cstruct-stubgen
2+
from typing import BinaryIO, Literal, overload
3+
4+
import dissect.cstruct as __cs__
5+
from typing_extensions import TypeAlias
6+
7+
class _c_asif(__cs__.cstruct):
8+
ASIF_HEADER_SIGNATURE: Literal[1936221303] = ...
9+
ASIF_META_HEADER_SIGNATURE: Literal[1835365473] = ...
10+
class asif_header(__cs__.Structure):
11+
header_signature: _c_asif.uint32
12+
header_version: _c_asif.uint32
13+
header_size: _c_asif.uint32
14+
header_flags: _c_asif.uint32
15+
directory_offsets: __cs__.Array[_c_asif.uint64]
16+
guid: __cs__.CharArray
17+
sector_count: _c_asif.uint64
18+
max_sector_count: _c_asif.uint64
19+
chunk_size: _c_asif.uint32
20+
block_size: _c_asif.uint16
21+
total_segments: _c_asif.uint16
22+
metadata_chunk: _c_asif.uint64
23+
unk_50: __cs__.CharArray
24+
read_only_flags: _c_asif.uint32
25+
metadata_flags: _c_asif.uint32
26+
metadata_read_only_flags: _c_asif.uint32
27+
@overload
28+
def __init__(
29+
self,
30+
header_signature: _c_asif.uint32 | None = ...,
31+
header_version: _c_asif.uint32 | None = ...,
32+
header_size: _c_asif.uint32 | None = ...,
33+
header_flags: _c_asif.uint32 | None = ...,
34+
directory_offsets: __cs__.Array[_c_asif.uint64] | None = ...,
35+
guid: __cs__.CharArray | None = ...,
36+
sector_count: _c_asif.uint64 | None = ...,
37+
max_sector_count: _c_asif.uint64 | None = ...,
38+
chunk_size: _c_asif.uint32 | None = ...,
39+
block_size: _c_asif.uint16 | None = ...,
40+
total_segments: _c_asif.uint16 | None = ...,
41+
metadata_chunk: _c_asif.uint64 | None = ...,
42+
unk_50: __cs__.CharArray | None = ...,
43+
read_only_flags: _c_asif.uint32 | None = ...,
44+
metadata_flags: _c_asif.uint32 | None = ...,
45+
metadata_read_only_flags: _c_asif.uint32 | None = ...,
46+
): ...
47+
@overload
48+
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...
49+
50+
class asif_meta_header(__cs__.Structure):
51+
header_signature: _c_asif.uint32
52+
header_version: _c_asif.uint32
53+
header_size: _c_asif.uint32
54+
data_size: _c_asif.uint64
55+
unk_14: _c_asif.uint64
56+
@overload
57+
def __init__(
58+
self,
59+
header_signature: _c_asif.uint32 | None = ...,
60+
header_version: _c_asif.uint32 | None = ...,
61+
header_size: _c_asif.uint32 | None = ...,
62+
data_size: _c_asif.uint64 | None = ...,
63+
unk_14: _c_asif.uint64 | None = ...,
64+
): ...
65+
@overload
66+
def __init__(self, fh: bytes | memoryview | bytearray | BinaryIO, /): ...
67+
68+
# Technically `c_asif` is an instance of `_c_asif`, but then we can't use it in type hints
69+
c_asif: TypeAlias = _c_asif
294 KB
Binary file not shown.

tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ def snapshot_qcow2() -> Iterator[BinaryIO]:
108108
yield from open_file_gz("_data/disk/qcow2/snapshot.qcow2.gz")
109109

110110

111+
@pytest.fixture
112+
def basic_asif() -> Iterator[BinaryIO]:
113+
yield from open_file_gz("_data/disk/asif/basic.asif.gz")
114+
115+
111116
@pytest.fixture
112117
def envelope() -> Iterator[BinaryIO]:
113118
yield from open_file("_data/util/envelope/local.tgz.ve")

0 commit comments

Comments
 (0)