Skip to content

Commit b53dbae

Browse files
committed
WIP: add support for MSI
Basic scaffolding, running into a few issues.
1 parent a5c01c1 commit b53dbae

File tree

3 files changed

+76
-1
lines changed

3 files changed

+76
-1
lines changed

python/unblob/handlers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
cab,
77
cpio,
88
dmg,
9+
msi,
910
partclone,
1011
rar,
1112
sevenzip,
@@ -88,6 +89,7 @@
8889
arc.ARCHandler,
8990
arj.ARJHandler,
9091
cab.CABHandler,
92+
msi.MsiHandler,
9193
tar.TarUstarHandler,
9294
tar.TarUnixHandler,
9395
cpio.PortableASCIIHandler,
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""MSI Handler
2+
3+
Extracts uses 7z for now. Could migrate to fully implementation:
4+
5+
https://github.com/nightlark/pymsi
6+
"""
7+
8+
from typing import Optional
9+
import io
10+
11+
import pymsi
12+
from structlog import get_logger
13+
14+
from unblob.extractors import Command
15+
16+
from ...models import (
17+
File,
18+
Handler,
19+
HandlerDoc,
20+
HandlerType,
21+
HexString,
22+
Reference,
23+
ValidChunk,
24+
)
25+
26+
logger = get_logger()
27+
28+
29+
class MsiHandler(Handler):
30+
NAME = "msi"
31+
32+
PATTERNS = [
33+
HexString("D0 CF 11 E0 A1 B1 1A E1")
34+
]
35+
EXTRACTOR = Command("7z", "x", "-p", "-y", "{inpath}", "-o{outdir}")
36+
37+
DOC = HandlerDoc(
38+
name="MSI",
39+
description="Microsoft Installer (MSI) files are used for the installation, maintenance, and removal of software.",
40+
handler_type=HandlerType.ARCHIVE,
41+
vendor="Microsoft",
42+
references=[
43+
Reference(
44+
title="MSI File Format Documentation",
45+
url="https://docs.microsoft.com/en-us/windows/win32/msi/overview-of-windows-installer",
46+
)
47+
],
48+
limitations=[],
49+
)
50+
51+
def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
52+
file.seek(start_offset, io.SEEK_SET)
53+
54+
try:
55+
# TODO: pymsi wants a path or BytesIO
56+
buf = io.BytesIO()
57+
buf.write(file[:])
58+
buf.seek(0)
59+
60+
package = pymsi.Package(buf)
61+
msi = pymsi.Msi(package, True)
62+
except Exception:
63+
return None
64+
65+
# MSI moves the file pointer
66+
msi_end_offset = buf.tell()
67+
68+
return ValidChunk(
69+
start_offset = start_offset,
70+
end_offset = msi_end_offset,
71+
)

python/unblob/processing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@
5454
DEFAULT_PROCESS_NUM = multiprocessing.cpu_count()
5555
DEFAULT_SKIP_MAGIC = (
5656
"BFLT",
57-
"Composite Document File V2 Document",
57+
# TODO: Need to disable this for MSI but does it need to be enabled for
58+
# other types of Composite Documents?
59+
#"Composite Document File V2 Document",
5860
"Erlang BEAM file",
5961
"GIF",
6062
"GNU message catalog",

0 commit comments

Comments
 (0)