Skip to content

Commit 6b898f7

Browse files
committed
[skip actions] [hsqs] 2025-11-17T15:45:59+02:00
1 parent 8832adf commit 6b898f7

File tree

14 files changed

+315
-21
lines changed

14 files changed

+315
-21
lines changed

.mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,6 @@ ignore_missing_imports = True
5858

5959
[mypy-striprtf.*]
6060
ignore_missing_imports = True
61+
62+
[mypy-PySquashfsImage.*]
63+
ignore_missing_imports = True

credsweeper/deep_scanner/deep_scanner.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .rpm_scanner import RpmScanner
2626
from .rtf_scanner import RtfScanner
2727
from .sqlite3_scanner import Sqlite3Scanner
28+
from .squashfs_scanner import SquashfsScanner
2829
from .strings_scanner import StringsScanner
2930
from .tar_scanner import TarScanner
3031
from .tmx_scanner import TmxScanner
@@ -54,6 +55,7 @@ class DeepScanner(
5455
PptxScanner, #
5556
RtfScanner, #
5657
RpmScanner, #
58+
SquashfsScanner, #
5759
Sqlite3Scanner, #
5860
StringsScanner, #
5961
TarScanner, #
@@ -136,6 +138,9 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[
136138
elif Util.is_sqlite3(data):
137139
if 0 < depth:
138140
deep_scanners.append(Sqlite3Scanner)
141+
elif Util.is_squashfs(data):
142+
if 0 < depth:
143+
deep_scanners.append(SquashfsScanner)
139144
elif Util.is_asn1(data):
140145
deep_scanners.append(PkcsScanner)
141146
elif Util.is_rtf(data):
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import logging
2+
from abc import ABC
3+
from typing import List, Optional
4+
5+
from PySquashfsImage import SquashFsImage
6+
7+
from credsweeper.credentials.candidate import Candidate
8+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9+
from credsweeper.file_handler.data_content_provider import DataContentProvider
10+
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
11+
from credsweeper.utils.util import Util
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class SquashfsScanner(AbstractScanner, ABC):
17+
"""Implements squash file system scanning"""
18+
19+
def data_scan(
20+
self, #
21+
data_provider: DataContentProvider, #
22+
depth: int, #
23+
recursive_limit_size: int) -> Optional[List[Candidate]]:
24+
"""Extracts files one by one from tar archive and launches data_scan"""
25+
try:
26+
candidates = []
27+
with SquashFsImage.from_bytes(data_provider.data) as image:
28+
for i in image:
29+
# skip directory
30+
if not i.is_file or i.is_symlink:
31+
continue
32+
logger.warning(f"{i.path}")
33+
if FilePathExtractor.check_exclude_file(self.config, i.path):
34+
continue
35+
if 0 > recursive_limit_size - i.size:
36+
logger.error(f"{i.name}: size {i.size}"
37+
f" is over limit {recursive_limit_size} depth:{depth}")
38+
continue
39+
logger.warning(f"{i.path} {i.name}")
40+
hsqs_content_provider = DataContentProvider(data=image.read_file(i.inode),
41+
file_path=i.path,
42+
file_type=Util.get_extension(i.path),
43+
info=f"{data_provider.info}|HSQS:{i.path}")
44+
# Nevertheless, use extracted data size
45+
new_limit = recursive_limit_size - len(hsqs_content_provider.data)
46+
logger.info(f"{i.name}: size {len(hsqs_content_provider.data)}")
47+
hsqs_candidates = self.recursive_scan(hsqs_content_provider, depth, new_limit)
48+
candidates.extend(hsqs_candidates)
49+
return candidates
50+
except Exception as hsqs_exc:
51+
logger.error(f"{data_provider.file_path}:{hsqs_exc}")
52+
return None

credsweeper/secret/config.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
"exclude": {
33
"pattern": [],
44
"containers": [
5+
".pak",
56
".aar",
67
".apk",
78
".bz2",
89
".class",
910
".gz",
1011
".jar",
12+
".img",
1113
".lzma",
1214
".rpm",
1315
".tar",
@@ -44,7 +46,6 @@
4446
".gif",
4547
".gmo",
4648
".ico",
47-
".img",
4849
".info",
4950
".jpeg",
5051
".jpg",
@@ -65,7 +66,6 @@
6566
".ogg",
6667
".ogv",
6768
".ops",
68-
".pak",
6969
".png",
7070
".psd",
7171
".pyc",

credsweeper/utils/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,16 @@ def is_rtf(data: Union[bytes, bytearray]):
364364
return True
365365
return False
366366

367+
@staticmethod
368+
def is_squashfs(data):
369+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
370+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"hsqs") and b"\x04\x00\x00\x00" == data[28:32]:
371+
# "Must be a power of two between 4096 (4k) and 1048576 (1 MiB)"
372+
block_size = int.from_bytes(data[12:16], byteorder="little", signed=False)
373+
if 0 == 0xFFF & block_size and 4096 <= block_size <= 1048576:
374+
return True
375+
return False
376+
367377
@staticmethod
368378
def is_asn1(data: Union[bytes, bytearray]) -> int:
369379
"""Only sequence type 0x30 and size correctness are checked

experiment/hyperparameters.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
HP_DICT = {
2-
"line_lstm_dropout_rate": ((0.3, 0.5, 0.01), 0.4),
3-
"line_lstm_recurrent_dropout_rate": ((0.0, 0.4, 0.01), 0.1),
4-
"variable_lstm_dropout_rate": ((0.3, 0.5, 0.01), 0.4),
5-
"variable_lstm_recurrent_dropout_rate": ((0.0, 0.4, 0.01), 0.1),
6-
"value_lstm_dropout_rate": ((0.3, 0.5, 0.01), 0.4),
7-
"value_lstm_recurrent_dropout_rate": ((0.0, 0.4, 0.01), 0.1),
8-
"dense_a_lstm_dropout_rate": ((0.1, 0.5, 0.01), 0.2),
9-
"dense_b_lstm_dropout_rate": ((0.1, 0.5, 0.01), 0.2),
2+
"line_lstm_dropout_rate": ((0.4, 0.5, 0.01), 0.4),
3+
"line_lstm_recurrent_dropout_rate": ((0.0, 0.3, 0.01), 0.1),
4+
"variable_lstm_dropout_rate": ((0.4, 0.5, 0.01), 0.4),
5+
"variable_lstm_recurrent_dropout_rate": ((0.0, 0.3, 0.01), 0.1),
6+
"value_lstm_dropout_rate": ((0.4, 0.5, 0.01), 0.4),
7+
"value_lstm_recurrent_dropout_rate": ((0.0, 0.3, 0.01), 0.1),
8+
"dense_a_drop": ((0.0, 0.3, 0.01), 0.2),
9+
"dense_b_drop": ((0.0, 0.3, 0.01), 0.2),
1010
}

experiment/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def main(argv) -> int:
6262
default=False)
6363
args = parser.parse_args(argv[1:])
6464

65-
fixed_seed = 20250919
65+
fixed_seed = 20251111
6666
print(f"Fixed seed:{fixed_seed}", flush=True)
6767
random.seed(fixed_seed)
6868

experiment/ml_model.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ def build(self, hp: Optional[Any]) -> Model:
4343
variable_lstm_recurrent_dropout_rate = self.get_hyperparam("variable_lstm_recurrent_dropout_rate", hp)
4444
value_lstm_dropout_rate = self.get_hyperparam("value_lstm_dropout_rate", hp)
4545
value_lstm_recurrent_dropout_rate = self.get_hyperparam("value_lstm_recurrent_dropout_rate", hp)
46-
dense_a_dropout_rate = self.get_hyperparam("dense_a_lstm_dropout_rate", hp)
47-
dense_b_dropout_rate = self.get_hyperparam("dense_b_lstm_dropout_rate", hp)
46+
dense_a_drop = self.get_hyperparam("dense_a_drop", hp)
47+
dense_b_drop = self.get_hyperparam("dense_b_drop", hp)
4848

4949
line_input = Input(shape=(None, self.line_shape[2]), name="line_input", dtype=self.d_type)
5050
line_lstm = LSTM(units=self.line_shape[1],
@@ -80,13 +80,13 @@ def build(self, hp: Optional[Any]) -> Model:
8080

8181
# first hidden layer
8282
dense_a = Dense(units=dense_units, activation=ReLU(), name="a_dense", dtype=self.d_type)(joined_features)
83-
dropout_dense_a = Dropout(dense_a_dropout_rate, name="a_dropout")(dense_a)
83+
drop_a = Dropout(name="a_drop", rate=dense_a_drop)(dense_a)
8484

8585
# second hidden layer
86-
dense_b = Dense(units=dense_units, activation=ReLU(), name="b_dense", dtype=self.d_type)(dropout_dense_a)
87-
dropout_dense_b = Dropout(dense_b_dropout_rate, name="b_dropout")(dense_b)
86+
dense_b = Dense(units=dense_units, activation=ReLU(), name="b_dense", dtype=self.d_type)(drop_a)
87+
drop_b = Dropout(name="b_drop", rate=dense_b_drop)(dense_b)
8888

89-
dense_final = Dense(units=1, activation='sigmoid', name="prediction", dtype=self.d_type)(dropout_dense_b)
89+
dense_final = Dense(units=1, activation='sigmoid', name="prediction", dtype=self.d_type)(drop_b)
9090

9191
metrics = [BinaryAccuracy(name="binary_accuracy"), Precision(name="precision"), Recall(name="recall")]
9292

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ dependencies = [
2424
"python-dateutil",
2525
"python-docx",
2626
"python-pptx",
27+
"PySquashfsImage",
2728
"PyYAML",
2829
"rpmfile",
2930
"striprtf",

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ numpy==2.3.3; python_version > '3.10'
2222
odfpy==1.4.1
2323
xlrd==2.0.2
2424
striprtf==0.0.29
25+
PySquashfsImage==0.9.0
2526

2627
# onnxruntime - ML engine
2728
onnxruntime==1.23.2

0 commit comments

Comments
 (0)