Skip to content

Commit 6e799ad

Browse files
jcrussellqkaiser
andcommitted
feat(handler): add support for PE files with extraction of NSIS executable
Add support for PE file by relying on LIEF to parse PE file once matched on 'MZ' or 'PE' signature. If the file is a self-extractable NSIS executable ("Nullsoft.NSIS.exehead" present in manifest) we extract it with 7zip. Note: the DLL files within MSI extraction directory are no longer extracted since the PE handler takes care of them. This is an improvement over the RAR false positive being found in the DLL. Co-authored-by: Quentin Kaiser <[email protected]>
1 parent ec71e31 commit 6e799ad

File tree

916 files changed

+2867
-19
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

916 files changed

+2867
-19
lines changed

docs/handlers.md

Lines changed: 18 additions & 0 deletions

python/unblob/handlers/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
zlib,
4040
zstd,
4141
)
42-
from .executable import elf
42+
from .executable import elf, pe
4343
from .filesystem import (
4444
cramfs,
4545
extfs,
@@ -118,6 +118,7 @@
118118
zstd.ZSTDHandler,
119119
elf.ELF32Handler,
120120
elf.ELF64Handler,
121+
pe.PEHandler,
121122
zlib.ZlibHandler,
122123
engenius.EngeniusHandler,
123124
ecc.AutelECCHandler,
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import io
2+
import struct
3+
from pathlib import Path
4+
from typing import Optional
5+
6+
import lief
7+
from structlog import get_logger
8+
9+
from unblob.extractors.command import Command
10+
11+
from ...models import (
12+
Extractor,
13+
ExtractResult,
14+
File,
15+
Handler,
16+
HandlerDoc,
17+
HandlerType,
18+
HexString,
19+
Reference,
20+
ValidChunk,
21+
)
22+
23+
lief.logging.disable()
24+
25+
logger = get_logger()
26+
27+
28+
class PEExtractor(Extractor):
29+
def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]:
30+
binary = lief.PE.parse(inpath)
31+
if binary and self.is_nsis(binary):
32+
return Command("7z", "x", "-y", "{inpath}", "-o{outdir}").extract(
33+
inpath, outdir
34+
)
35+
return None
36+
37+
def is_nsis(self, binary: lief.PE.Binary) -> bool:
38+
# Test if binary appears to be a Nullsoft Installer self-extracting archive
39+
# see https://github.com/file/file/blob/7ed3febfcd616804a2ec6495b3e5f9ccb6fc5f8f/magic/Magdir/msdos#L383
40+
41+
if binary.has_resources:
42+
resource_manager = binary.resources_manager
43+
if (
44+
isinstance(resource_manager, lief.PE.ResourcesManager)
45+
and resource_manager.has_manifest
46+
):
47+
manifest = (
48+
resource_manager.manifest
49+
if isinstance(resource_manager.manifest, str)
50+
else resource_manager.manifest.decode(errors="ignore")
51+
)
52+
if "Nullsoft.NSIS.exehead" in manifest:
53+
return True
54+
return False
55+
56+
57+
class PEHandler(Handler):
58+
NAME = "pe"
59+
60+
PATTERNS = [
61+
HexString(
62+
"""
63+
// MZ header
64+
4d 5a
65+
"""
66+
),
67+
HexString(
68+
"""
69+
// PE header
70+
50 45 00 00
71+
"""
72+
),
73+
]
74+
75+
EXTRACTOR = PEExtractor()
76+
77+
DOC = HandlerDoc(
78+
name="pe",
79+
description="The PE (Portable Executable) is a binary file format used for executable code on 32-bit and 64-bit Windows operating systems as well as in UEFI environments.",
80+
handler_type=HandlerType.EXECUTABLE,
81+
vendor="Microsoft",
82+
references=[
83+
Reference(
84+
title="PE Format",
85+
url="https://learn.microsoft.com/en-us/windows/win32/debug/pe-format",
86+
),
87+
Reference(
88+
title="Portable Executable Wikipedia",
89+
url="https://en.wikipedia.org/wiki/Portable_Executable",
90+
),
91+
],
92+
limitations=[],
93+
)
94+
95+
def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
96+
file.seek(start_offset, io.SEEK_SET)
97+
98+
binary = lief.PE.parse(file[start_offset:])
99+
if not binary:
100+
return None
101+
102+
# Check to see if we can extract the size of the full NSIS Installer by
103+
# including the archive size from the NSIS header.
104+
if binary.overlay:
105+
overlay = bytes(binary.overlay)
106+
107+
magic_offset = overlay.find(b"NullsoftInst")
108+
if magic_offset != -1:
109+
header_start = magic_offset - 8
110+
if header_start < 0:
111+
# Malformed NSIS header?
112+
return None
113+
114+
_, _, _, _, archive_size = struct.unpack(
115+
"II12sII", overlay[header_start : header_start + 28]
116+
)
117+
118+
return ValidChunk(
119+
start_offset=start_offset,
120+
end_offset=start_offset + binary.overlay_offset + archive_size,
121+
)
122+
123+
return ValidChunk(
124+
start_offset=start_offset,
125+
end_offset=start_offset + binary.original_size,
126+
)

tests/integration/archive/msi/__output__/7z2501.msi.padded_extract/16-1563664.msi_extract/_7z.dll_extract/0-1074752.unknown

Lines changed: 0 additions & 3 deletions
This file was deleted.

tests/integration/archive/msi/__output__/7z2501.msi.padded_extract/16-1563664.msi_extract/_7z.dll_extract/1074752-1133791.rar

Lines changed: 0 additions & 3 deletions
This file was deleted.

tests/integration/archive/msi/__output__/7z2501.msi.padded_extract/16-1563664.msi_extract/_7z.dll_extract/1133791-1316352.unknown

Lines changed: 0 additions & 3 deletions
This file was deleted.

tests/integration/archive/msi/__output__/7z2501.msi_extract/_7z.dll_extract/0-1074752.unknown

Lines changed: 0 additions & 3 deletions
This file was deleted.

tests/integration/archive/msi/__output__/7z2501.msi_extract/_7z.dll_extract/1074752-1133791.rar

Lines changed: 0 additions & 3 deletions
This file was deleted.

tests/integration/archive/msi/__output__/7z2501.msi_extract/_7z.dll_extract/1133791-1316352.unknown

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:38d49f8fe09b1c332b01d0940e57b7258f4447733643273a01c59959ad9d3b0a
3+
size 1564991

0 commit comments

Comments
 (0)