|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import plistlib |
| 4 | +from functools import cached_property, lru_cache |
| 5 | +from typing import Any, BinaryIO |
| 6 | +from uuid import UUID |
| 7 | + |
| 8 | +from dissect.util.stream import AlignedStream |
| 9 | + |
| 10 | +from dissect.hypervisor.disk.c_asif import c_asif |
| 11 | +from dissect.hypervisor.exceptions import InvalidSignature |
| 12 | + |
| 13 | + |
| 14 | +class ASIF: |
| 15 | + """Apple Sparse Image Format (ASIF) disk image. |
| 16 | +
|
| 17 | + ASIF disk images are a virtual disk format introduced in macOS Tahoe. They can be used in Apple's Virtualization |
| 18 | + framework, as well as through Disk Utility. |
| 19 | +
|
| 20 | + An ASIF file is pretty straight forward. There's a small header which, among some other details, contains two |
| 21 | + directory offsets. Each directory contains a list of tables, which in turn contain a list of data entries. Each data |
| 22 | + entry points to a chunk of data in the ASIF file. The chunk size is defined in the header and is typically 1 MiB. |
| 23 | + The chunk size is always a multiple of the block size, which is also defined in the header (typically 512 bytes). |
| 24 | + Each directory has a version number, and the directory with the highest version number is the active directory. This |
| 25 | + allows for atomic updates of the directory/table data. |
| 26 | +
|
| 27 | + The maximum virtual disk size seems to be just under 4 PiB, with a small portion at the end reserved for metadata. |
| 28 | + The actual size of the virtual disk is defined in the header, as well as the maximum size the disk can grow to. |
| 29 | +
|
| 30 | + The offset to the metadata block is typically ``(4 PiB - 1 chunk)``, meaning it's within the reserved area. |
| 31 | + The metadata block contains a small header and a plist. The plist should contain an ``internal metadata`` and |
| 32 | + ``user metadata`` dictionary. Besides a "stable uuid", it's unclear what the metadata is used for or how to set it. |
| 33 | +
|
| 34 | + Args: |
| 35 | + fh: File-like object containing the ASIF image. |
| 36 | +
|
| 37 | + Resources: |
| 38 | + - Reversing ``diskimagescontroller`` |
| 39 | + - https://developer.apple.com/documentation/virtualization/vzdiskimagestoragedeviceattachment/ |
| 40 | + """ |
| 41 | + |
| 42 | + def __init__(self, fh: BinaryIO): |
| 43 | + self.fh = fh |
| 44 | + |
| 45 | + self.header = c_asif.asif_header(fh) |
| 46 | + if self.header.header_signature != c_asif.ASIF_HEADER_SIGNATURE: |
| 47 | + raise InvalidSignature( |
| 48 | + f"Not a valid ASIF image (expected {c_asif.ASIF_HEADER_SIGNATURE:#x}, " |
| 49 | + f"got {self.header.header_signature:#x})" |
| 50 | + ) |
| 51 | + |
| 52 | + self.guid = UUID(bytes=self.header.guid) |
| 53 | + self.block_size = self.header.block_size |
| 54 | + self.chunk_size = self.header.chunk_size |
| 55 | + self.size = self.header.sector_count * self.block_size |
| 56 | + self.max_size = self.header.max_sector_count * self.block_size |
| 57 | + |
| 58 | + # The following math is taken from the assembly with some creative variable naming |
| 59 | + # It's possible that some of this can be simplified or the names improved |
| 60 | + self._blocks_per_chunk = self.chunk_size // self.block_size |
| 61 | + |
| 62 | + # This check doesn't really make sense, but keep it in for now |
| 63 | + reserved_size = 4 * self.chunk_size |
| 64 | + self._num_reserved_table_entries = ( |
| 65 | + 1 if reserved_size < self._blocks_per_chunk else reserved_size // self._blocks_per_chunk |
| 66 | + ) |
| 67 | + |
| 68 | + self._max_table_entries = self.chunk_size >> 3 |
| 69 | + self._num_table_entries = self._max_table_entries - ( |
| 70 | + self._max_table_entries % (self._num_reserved_table_entries + 1) |
| 71 | + ) |
| 72 | + self._num_reserved_directory_entries = (self._num_reserved_table_entries + self._num_table_entries) // ( |
| 73 | + self._num_reserved_table_entries + 1 |
| 74 | + ) |
| 75 | + self._num_usable_entries = self._num_table_entries - self._num_reserved_directory_entries |
| 76 | + # This is the size in bytes of data covered by a single table |
| 77 | + self._size_per_table = self._num_usable_entries * self.chunk_size |
| 78 | + |
| 79 | + max_size = self.block_size * self.header.max_sector_count |
| 80 | + self._num_directory_entries = (self._size_per_table + max_size - 1) // self._size_per_table |
| 81 | + |
| 82 | + self._aligned_table_size = ( |
| 83 | + (self.block_size + 8 * self._num_table_entries - 1) // self.block_size * self.block_size |
| 84 | + ) |
| 85 | + |
| 86 | + self.directories = sorted( |
| 87 | + (Directory(self, offset) for offset in self.header.directory_offsets), |
| 88 | + key=lambda d: d.version, |
| 89 | + reverse=True, |
| 90 | + ) |
| 91 | + self.active_directory = self.directories[0] |
| 92 | + |
| 93 | + self.metadata_header = None |
| 94 | + self.metadata: dict[str, Any] = {} |
| 95 | + if self.header.metadata_chunk: |
| 96 | + # Open the file in reserved mode to read from the reserved area |
| 97 | + with self.open(reserved=True) as disk: |
| 98 | + metadata_offset = self.header.metadata_chunk * self.chunk_size |
| 99 | + disk.seek(metadata_offset) |
| 100 | + self.metadata_header = c_asif.asif_meta_header(disk) |
| 101 | + |
| 102 | + if self.metadata_header.header_signature != c_asif.ASIF_META_HEADER_SIGNATURE: |
| 103 | + raise InvalidSignature( |
| 104 | + f"Invalid a ASIF metadata header (expected {c_asif.ASIF_META_HEADER_SIGNATURE:#x}, " |
| 105 | + f"got {self.metadata_header.header_signature:#x})" |
| 106 | + ) |
| 107 | + |
| 108 | + disk.seek(metadata_offset + self.metadata_header.header_size) |
| 109 | + self.metadata = plistlib.loads(disk.read(self.metadata_header.data_size).strip(b"\x00")) |
| 110 | + |
| 111 | + @property |
| 112 | + def internal_metadata(self) -> dict[str, Any]: |
| 113 | + """Get internal metadata from the ASIF image. |
| 114 | +
|
| 115 | + Returns: |
| 116 | + A dictionary containing the internal metadata. |
| 117 | + """ |
| 118 | + return self.metadata.get("internal metadata", {}) |
| 119 | + |
| 120 | + @property |
| 121 | + def user_metadata(self) -> dict[str, Any]: |
| 122 | + """Get user metadata from the ASIF image. |
| 123 | +
|
| 124 | + Returns: |
| 125 | + A dictionary containing the user metadata. |
| 126 | + """ |
| 127 | + return self.metadata.get("user metadata", {}) |
| 128 | + |
| 129 | + def open(self, reserved: bool = False) -> DataStream: |
| 130 | + """Open a stream to read the ASIF image data. |
| 131 | +
|
| 132 | + Args: |
| 133 | + reserved: Whether to allow reading into the reserved area of the ASIF image. |
| 134 | +
|
| 135 | + Returns: |
| 136 | + A stream-like object that can be used to read the image data. |
| 137 | + """ |
| 138 | + return DataStream(self, reserved) |
| 139 | + |
| 140 | + |
| 141 | +class Directory: |
| 142 | + """ASIF Directory. |
| 143 | +
|
| 144 | + A directory has a version (``uint64``) followed by a list of table entries (``uint64[]``). |
| 145 | + The version number is used to determine the active directory, with the highest version being the active one. |
| 146 | + Each table entry is a chunk number and points to a table in the ASIF image. |
| 147 | +
|
| 148 | + Args: |
| 149 | + asif: The ASIF image this directory belongs to. |
| 150 | + offset: Offset of the directory in the ASIF image. |
| 151 | + """ |
| 152 | + |
| 153 | + def __init__(self, asif: ASIF, offset: int): |
| 154 | + self.asif = asif |
| 155 | + self.offset = offset |
| 156 | + |
| 157 | + self.asif.fh.seek(offset) |
| 158 | + self.version = c_asif.uint64(self.asif.fh) |
| 159 | + |
| 160 | + self.table = lru_cache(128)(self.table) |
| 161 | + |
| 162 | + def __repr__(self) -> str: |
| 163 | + return f"<Directory offset={self.offset:#x} version={self.version}>" |
| 164 | + |
| 165 | + @cached_property |
| 166 | + def entries(self) -> list[int]: |
| 167 | + """List of table entries in the directory.""" |
| 168 | + # Seek over the version |
| 169 | + self.asif.fh.seek(self.offset + 8) |
| 170 | + return c_asif.uint64[self.asif._num_directory_entries](self.asif.fh) |
| 171 | + |
| 172 | + def table(self, index: int) -> Table: |
| 173 | + """Get a table from the directory. |
| 174 | +
|
| 175 | + Args: |
| 176 | + index: Index of the table in the directory. |
| 177 | + """ |
| 178 | + if index >= self.asif._num_directory_entries: |
| 179 | + raise IndexError("Table index out of range") |
| 180 | + return Table(self, index) |
| 181 | + |
| 182 | + |
| 183 | +class Table: |
| 184 | + """ASIF Table. |
| 185 | +
|
| 186 | + A table contains a list of data entries (``uint64[]``). Each data entry is a chunk number and points to a chunk of |
| 187 | + data in the ASIF image. Each table covers a fixed amount of data in the virtual disk. |
| 188 | +
|
| 189 | + Data entries have 55 bits usable for the chunk number and 9 bits reserved for flags. |
| 190 | +
|
| 191 | + .. rubric :: Encoding |
| 192 | + .. code-block:: c |
| 193 | +
|
| 194 | + 0b00000000 01111111 11111111 11111111 11111111 11111111 11111111 11111111 (chunk number) |
| 195 | + 0b00111111 10000000 00000000 00000000 00000000 00000000 00000000 00000000 (reserved) |
| 196 | + 0b01000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (entry dirty) |
| 197 | + 0b10000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 (content dirty) |
| 198 | +
|
| 199 | + Args: |
| 200 | + directory: The directory this table belongs to. |
| 201 | + index: Index of the table in the directory. |
| 202 | + """ |
| 203 | + |
| 204 | + def __init__(self, directory: Directory, index: int): |
| 205 | + self.asif = directory.asif |
| 206 | + self.directory = directory |
| 207 | + self.index = index |
| 208 | + |
| 209 | + self.offset = self.directory.entries[index] * self.asif.chunk_size |
| 210 | + self.virtual_offset = index * self.asif._size_per_table |
| 211 | + |
| 212 | + def __repr__(self) -> str: |
| 213 | + return f"<Table index={self.index} offset={self.offset:#x} virtual_offset={self.virtual_offset:#x}>" |
| 214 | + |
| 215 | + @cached_property |
| 216 | + def entries(self) -> list[int]: |
| 217 | + """List of data entries in the table.""" |
| 218 | + self.asif.fh.seek(self.offset) |
| 219 | + return c_asif.uint64[self.asif._num_table_entries](self.asif.fh) |
| 220 | + |
| 221 | + |
| 222 | +class DataStream(AlignedStream): |
| 223 | + """Stream to read data from an ASIF image. |
| 224 | +
|
| 225 | + Args: |
| 226 | + asif: The ASIF image to read from. |
| 227 | + reserved: Whether to allow reading into the reserved area of the ASIF image. |
| 228 | + """ |
| 229 | + |
| 230 | + def __init__(self, asif: ASIF, reserved: bool = False): |
| 231 | + super().__init__(asif.max_size if reserved else asif.size, align=asif.chunk_size) |
| 232 | + self.asif = asif |
| 233 | + self.reserved = reserved |
| 234 | + self.directory = asif.active_directory |
| 235 | + |
| 236 | + def _read(self, offset: int, length: int) -> bytes: |
| 237 | + result = [] |
| 238 | + while length: |
| 239 | + table = self.directory.table(offset // self.asif._size_per_table) |
| 240 | + relative_block_index = (offset // self.asif.block_size) - (table.virtual_offset // self.asif.block_size) |
| 241 | + data_idx = ( |
| 242 | + relative_block_index // self.asif._blocks_per_chunk |
| 243 | + + relative_block_index // self.asif._blocks_per_chunk * self.asif._num_reserved_table_entries |
| 244 | + ) // self.asif._num_reserved_table_entries |
| 245 | + |
| 246 | + # 0x8000000000000000 = content dirty bit |
| 247 | + # 0x4000000000000000 = entry dirty bit |
| 248 | + # 0x3F80000000000000 = reserved bits |
| 249 | + chunk = table.entries[data_idx] & 0x7FFFFFFFFFFFFF |
| 250 | + raw_offset = chunk * self.asif.chunk_size |
| 251 | + |
| 252 | + read_length = min(length, self.asif.chunk_size) |
| 253 | + if chunk == 0: |
| 254 | + result.append(b"\x00" * read_length) |
| 255 | + else: |
| 256 | + self.asif.fh.seek(raw_offset) |
| 257 | + result.append(self.asif.fh.read(read_length)) |
| 258 | + |
| 259 | + offset += read_length |
| 260 | + length -= read_length |
| 261 | + |
| 262 | + return b"".join(result) |
0 commit comments