Skip to content

Commit d2d06a0

Browse files
László Vaskóqkaiser
authored andcommitted
hyperscan: replacing wrapper
1 parent 414bb7a commit d2d06a0

File tree

9 files changed

+88
-160
lines changed

9 files changed

+88
-160
lines changed

.github/actions/setup-dependencies/action.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
inputs:
22
python-version:
3-
description: 'Python version to setup'
3+
description: "Python version to setup"
44
required: false
55
default: 3.8
66
runs:
77
using: "composite"
88
steps:
99
- name: Install 3rd party from apt
10-
run: sudo apt install e2fsprogs p7zip-full unar zlib1g-dev liblzo2-dev lz4 lzop lziprecover img2simg libhyperscan5 libhyperscan-dev zstd
10+
run: sudo apt install e2fsprogs p7zip-full unar zlib1g-dev liblzo2-dev lz4 lzop lziprecover img2simg zstd
1111
shell: bash
1212

1313
- name: Install sasquatch

Dockerfile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
2121
xz-utils \
2222
zlib1g-dev \
2323
libmagic1 \
24-
libhyperscan5 \
25-
libhyperscan-dev \
2624
zstd
2725
RUN curl -L -o sasquatch_1.0_amd64.deb https://github.com/onekey-sec/sasquatch/releases/download/sasquatch-v1.0/sasquatch_1.0_amd64.deb \
2826
&& dpkg -i sasquatch_1.0_amd64.deb \

default.nix

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
, simg2img
1717
, unar
1818
, file
19-
, hyperscan
2019
, zstd
2120
}:
2221

@@ -72,21 +71,6 @@ let
7271
'';
7372
});
7473

75-
hyperscan = super.hyperscan.overridePythonAttrs (_: {
76-
buildInputs = [
77-
hyperscan
78-
self.poetry
79-
self.setuptools
80-
];
81-
nativeBuildInputs = [
82-
pkg-config
83-
];
84-
85-
installPhase = ''
86-
${self.python.pythonForBuild.interpreter} -m pip install --no-build-isolation --no-index --prefix=$out --ignore-installed --no-dependencies --no-cache .
87-
'';
88-
});
89-
9074
arpy = overrideWithSetuptools super.arpy { };
9175
yaffshiv = overrideWithSetuptools super.yaffshiv { };
9276
ubi-reader = overrideWithSetuptools super.ubi-reader { };

poetry.lock

Lines changed: 17 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ yaffshiv = { git = "https://github.com/onekey-sec/yaffshiv.git", rev = "a8f21283
2626
plotext = "^4.1.5"
2727
pluggy = "^1.0.0"
2828
python-magic = "^0.4.27"
29-
hyperscan = "0.2.0"
29+
pyperscan = "^0.1.0"
3030
lark = "^1.1.2"
3131
lz4 = "^4.0.0"
3232
lief = "^0.12.3"

tests/test_finder.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import attr
22
import pytest
3+
from pyperscan import Scan
34

45
from unblob.file_utils import InvalidInputFormat
56
from unblob.finder import build_hyperscan_database, search_chunks
@@ -58,33 +59,26 @@ def calculate_chunk(self, file, start_offset: int):
5859

5960

6061
def test_build_hyperscan_database():
61-
db, handler_map = build_hyperscan_database((TestHandlerA, TestHandlerB))
62+
db = build_hyperscan_database((TestHandlerA, TestHandlerB))
6263
matches = []
63-
db.scan(
64-
[bytearray(b"A123456789BB")],
65-
match_event_handler=lambda pattern_id, start, end, flags, m: m.append(
66-
(pattern_id, start, end)
67-
),
68-
context=matches,
69-
)
7064

71-
assert len(handler_map) == 3
65+
def on_match(m, pattern_id, start, end):
66+
m.append((pattern_id, start, end))
67+
return Scan.Continue
68+
69+
db.build(matches, on_match).scan(b"A123456789BB")
7270

7371
assert len(matches) == 2
74-
assert isinstance(handler_map[matches[0][0]], TestHandlerA)
75-
assert isinstance(handler_map[matches[1][0]], TestHandlerB)
7672
assert matches[0][1] == 0
7773
assert matches[1][1] == 10
7874

7975

8076
def test_db_and_handler_map_instances_are_cached():
81-
db1, handler_map1 = build_hyperscan_database((TestHandlerA, TestHandlerB))
82-
db2, handler_map2 = build_hyperscan_database((TestHandlerA, TestHandlerB))
83-
db3, handler_map3 = build_hyperscan_database((TestHandlerA,))
77+
db1 = build_hyperscan_database((TestHandlerA, TestHandlerB))
78+
db2 = build_hyperscan_database((TestHandlerA, TestHandlerB))
79+
db3 = build_hyperscan_database((TestHandlerA,))
8480
assert db1 is db2
85-
assert handler_map1 is handler_map2
8681
assert db1 is not db3
87-
assert handler_map1 is not handler_map3
8882

8983

9084
def test_invalid_hexstring_pattern_raises():

unblob/finder.py

Lines changed: 25 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
Searching Chunk related functions.
33
The main "entry point" is search_chunks_by_priority.
44
"""
5-
from enum import Flag
65
from functools import lru_cache
7-
from typing import Dict, List, Optional, Tuple
6+
from typing import List, Optional
87

98
import attr
10-
import hyperscan
9+
from pyperscan import BlockDatabase, Flag, Pattern, Scan
1110
from structlog import get_logger
1211

1312
from .file_utils import InvalidInputFormat, SeekError
@@ -21,18 +20,12 @@
2120

2221
@attr.define
2322
class HyperscanMatchContext:
24-
handler_map: Dict[int, Handler]
2523
file: File
2624
file_size: int
2725
all_chunks: List
2826
task_result: TaskResult
2927

3028

31-
class _HyperscanScan(Flag):
32-
Continue = False
33-
Terminate = True
34-
35-
3629
def _calculate_chunk(
3730
handler: Handler, file: File, real_offset, task_result: TaskResult
3831
) -> Optional[ValidChunk]:
@@ -74,13 +67,12 @@ def _calculate_chunk(
7467

7568

7669
def _hyperscan_match(
77-
pattern_id: int, offset: int, end: int, flags: int, context: HyperscanMatchContext
78-
) -> _HyperscanScan:
79-
handler = context.handler_map[pattern_id]
70+
context: HyperscanMatchContext, handler: Handler, offset: int, end: int
71+
) -> Scan:
8072
real_offset = offset + handler.PATTERN_MATCH_OFFSET
8173

8274
if real_offset < 0:
83-
return _HyperscanScan.Continue
75+
return Scan.Continue
8476

8577
# Skip chunk calculation if this would start inside another one,
8678
# similar to remove_inner_chunks, but before we even begin calculating.
@@ -91,7 +83,7 @@ def _hyperscan_match(
9183
offset=real_offset,
9284
_verbosity=2,
9385
)
94-
return _HyperscanScan.Continue
86+
return Scan.Continue
9587

9688
logger.debug(
9789
"Calculating chunk for pattern match",
@@ -104,11 +96,11 @@ def _hyperscan_match(
10496

10597
# We found some random bytes this handler couldn't parse
10698
if chunk is None:
107-
return _HyperscanScan.Continue
99+
return Scan.Continue
108100

109101
if chunk.end_offset > context.file_size:
110102
logger.debug("Chunk overflows file", chunk=chunk, _verbosity=2)
111-
return _HyperscanScan.Continue
103+
return Scan.Continue
112104

113105
chunk.handler = handler
114106
logger.debug("Found valid chunk", chunk=chunk, handler=handler.NAME, _verbosity=2)
@@ -117,9 +109,9 @@ def _hyperscan_match(
117109
# Terminate scan if we match till the end of the file
118110
if chunk.end_offset == context.file_size:
119111
logger.debug("Chunk covers till end of the file", chunk=chunk)
120-
return _HyperscanScan.Terminate
112+
return Scan.Terminate
121113

122-
return _HyperscanScan.Continue
114+
return Scan.Continue
123115

124116

125117
def search_chunks( # noqa: C901
@@ -135,33 +127,28 @@ def search_chunks( # noqa: C901
135127
"""
136128
all_chunks = []
137129

138-
hyperscan_db, handler_map = build_hyperscan_database(handlers)
130+
hyperscan_db = build_hyperscan_database(handlers)
139131

140132
hyperscan_context = HyperscanMatchContext(
141-
handler_map=handler_map,
142133
file=file,
143134
file_size=file_size,
144135
all_chunks=all_chunks,
145136
task_result=task_result,
146137
)
147138

139+
scanner = hyperscan_db.build(hyperscan_context, _hyperscan_match)
140+
148141
try:
149-
hyperscan_db.scan(
150-
[file],
151-
match_event_handler=_hyperscan_match,
152-
context=hyperscan_context,
153-
)
154-
except hyperscan.error as e:
155-
if e.args and e.args[0] == f"error code {hyperscan.HS_SCAN_TERMINATED}":
142+
if scanner.scan(file) == Scan.Terminate:
156143
logger.debug(
157144
"Scanning terminated as chunk matches till end of file",
158145
)
159146
return all_chunks
160-
else:
161-
logger.error(
162-
"Error scanning for patterns",
163-
error=e,
164-
)
147+
except Exception as e:
148+
logger.error(
149+
"Error scanning for patterns",
150+
error=e,
151+
)
165152

166153
logger.debug(
167154
"Ended searching for chunks",
@@ -172,21 +159,18 @@ def search_chunks( # noqa: C901
172159

173160

174161
@lru_cache
175-
def build_hyperscan_database(handlers: Handlers) -> Tuple[hyperscan.Database, Dict]:
176-
db = hyperscan.Database(mode=hyperscan.HS_MODE_VECTORED)
177-
handler_map = dict()
178-
179-
pattern_id = 0
162+
def build_hyperscan_database(handlers: Handlers):
180163
patterns = []
181164
for handler_class in handlers:
182165
handler = handler_class()
183166
for pattern in handler.PATTERNS:
184167
try:
185168
patterns.append(
186-
(
169+
Pattern(
187170
pattern.as_regex(),
188-
pattern_id,
189-
hyperscan.HS_FLAG_SOM_LEFTMOST | hyperscan.HS_FLAG_DOTALL,
171+
Flag.SOM_LEFTMOST,
172+
Flag.DOTALL,
173+
tag=handler,
190174
)
191175
)
192176
except InvalidHexString as e:
@@ -197,10 +181,4 @@ def build_hyperscan_database(handlers: Handlers) -> Tuple[hyperscan.Database, Di
197181
error=str(e),
198182
)
199183
raise
200-
handler_map[pattern_id] = handler
201-
pattern_id += 1
202-
203-
expressions, ids, flags = zip(*patterns)
204-
db.compile(expressions=expressions, ids=ids, elements=len(patterns), flags=flags)
205-
206-
return db, handler_map
184+
return BlockDatabase(*patterns)

0 commit comments

Comments
 (0)