Skip to content

Commit b1ece68

Browse files
committed
[skip actions] [hsqs] 2025-10-05T10:19:30+03:00
1 parent 69d362a commit b1ece68

File tree

9 files changed

+76
-7
lines changed

9 files changed

+76
-7
lines changed

credsweeper/deep_scanner/deep_scanner.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from .pptx_scanner import PptxScanner
2424
from .rpm_scanner import RpmScanner
2525
from .sqlite3_scanner import Sqlite3Scanner
26+
from .squashfs_scanner import SquashfsScanner
2627
from .strings_scanner import StringsScanner
2728
from .tar_scanner import TarScanner
2829
from .tmx_scanner import TmxScanner
@@ -49,6 +50,7 @@ class DeepScanner(
4950
PdfScanner, #
5051
PkcsScanner, #
5152
PptxScanner, #
53+
SquashfsScanner, #
5254
RpmScanner, #
5355
Sqlite3Scanner, #
5456
StringsScanner, #
@@ -132,6 +134,9 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[
132134
elif Util.is_sqlite3(data):
133135
if 0 < depth:
134136
deep_scanners.append(Sqlite3Scanner)
137+
elif Util.is_squashfs(data):
138+
if 0 < depth:
139+
deep_scanners.append(SquashfsScanner)
135140
elif Util.is_asn1(data):
136141
deep_scanners.append(PkcsScanner)
137142
elif Util.is_xml(data):
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import logging
2+
from abc import ABC
3+
from typing import List, Optional
4+
5+
from PySquashfsImage import SquashFsImage
6+
7+
from credsweeper.credentials.candidate import Candidate
8+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
9+
from credsweeper.file_handler.data_content_provider import DataContentProvider
10+
from credsweeper.file_handler.file_path_extractor import FilePathExtractor
11+
from credsweeper.utils.util import Util
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
class SquashfsScanner(AbstractScanner, ABC):
17+
"""Implements squash file system scanning"""
18+
19+
def data_scan(
20+
self, #
21+
data_provider: DataContentProvider, #
22+
depth: int, #
23+
recursive_limit_size: int) -> Optional[List[Candidate]]:
24+
"""Extracts files one by one from tar archive and launches data_scan"""
25+
try:
26+
candidates = []
27+
with SquashFsImage.from_bytes(data_provider.data) as image:
28+
for i in image:
29+
# skip directory
30+
if not i.is_file or i.is_symlink:
31+
continue
32+
logger.warning(f"{i.path}")
33+
if FilePathExtractor.check_exclude_file(self.config, i.path):
34+
continue
35+
if 0 > recursive_limit_size - i.size:
36+
logger.error(f"{i.name}: size {i.size}"
37+
f" is over limit {recursive_limit_size} depth:{depth}")
38+
continue
39+
logger.warning(f"{i.path} {i.name}")
40+
hsqs_content_provider = DataContentProvider(data=image.read_file(i.inode),
41+
file_path=i.path,
42+
file_type=Util.get_extension(i.path),
43+
info=f"{data_provider.info}|HSQS:{i.path}")
44+
# Nevertheless, use extracted data size
45+
new_limit = recursive_limit_size - len(hsqs_content_provider.data)
46+
logger.info(f"{i.name}: size {len(hsqs_content_provider.data)}")
47+
hsqs_candidates = self.recursive_scan(hsqs_content_provider, depth, new_limit)
48+
candidates.extend(hsqs_candidates)
49+
return candidates
50+
except Exception as hsqs_exc:
51+
logger.error(f"{data_provider.file_path}:{hsqs_exc}")
52+
return None

credsweeper/secret/config.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
"exclude": {
33
"pattern": [],
44
"containers": [
5+
".pak",
56
".aar",
67
".apk",
78
".bz2",
89
".class",
910
".gz",
1011
".jar",
12+
".img",
1113
".lzma",
1214
".rpm",
1315
".tar",
@@ -41,7 +43,6 @@
4143
".gif",
4244
".gmo",
4345
".ico",
44-
".img",
4546
".info",
4647
".jpeg",
4748
".jpg",
@@ -62,7 +63,6 @@
6263
".ogg",
6364
".ogv",
6465
".ops",
65-
".pak",
6666
".png",
6767
".psd",
6868
".pyc",

credsweeper/utils/util.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,13 +350,23 @@ def is_lzma(data: Union[bytes, bytearray]) -> bool:
350350
return True
351351
return False
352352

353-
@classmethod
354-
def is_sqlite3(cls, data):
353+
@staticmethod
354+
def is_sqlite3(data):
355355
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
356356
if isinstance(data, (bytes, bytearray)) and data.startswith(b"SQLite format 3\0"):
357357
return True
358358
return False
359359

360+
@staticmethod
361+
def is_squashfs(data):
362+
"""According https://en.wikipedia.org/wiki/List_of_file_signatures - SQLite Database"""
363+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"hsqs") and b"\x04\x00\x00\x00" == data[28:32]:
364+
# "Must be a power of two between 4096 (4k) and 1048576 (1 MiB)"
365+
block_size = int.from_bytes(data[12:16], byteorder="little", signed=False)
366+
if 0 == 0xFFF & block_size and 4096 <= block_size <= 1048576:
367+
return True
368+
return False
369+
360370
@staticmethod
361371
def is_asn1(data: Union[bytes, bytearray]) -> int:
362372
"""Only sequence type 0x30 and size correctness are checked

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ dependencies = [
2626
"python-pptx",
2727
"PyYAML",
2828
"rpmfile",
29+
"PySquashfsImage",
2930
"whatthepatch",
3031
"xlrd",
3132
]

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ numpy==2.2.6; python_version == '3.10'
2121
numpy==2.3.3; python_version > '3.10'
2222
odfpy==1.4.1
2323
xlrd==2.0.2
24+
PySquashfsImage==0.9.0
2425

2526
# onnxruntime - ML engine
2627
onnxruntime==1.23.0

tests/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from pathlib import Path
22

33
# total number of files in test samples
4-
SAMPLES_FILES_COUNT = 167
4+
SAMPLES_FILES_COUNT = 168
55

66
# the lowest value of ML threshold is used to display possible lowest values
77
NEGLIGIBLE_ML_THRESHOLD = 0.0001
@@ -19,7 +19,7 @@
1919
SAMPLES_POST_CRED_COUNT = 439
2020

2121
# archived credentials that are not found without --depth
22-
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 120
22+
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 125
2323
SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 3
2424
SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 4
2525

tests/samples/sample.hsqs

4 KB
Binary file not shown.

tests/test_app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ def test_depth_p(self) -> None:
493493
cvs_checksum = hashlib.md5(f.read()).digest()
494494
checksum = bytes(a ^ b for a, b in zip(checksum, cvs_checksum))
495495
# update the checksum manually and keep line endings in the samples as is (git config core.autocrlf false)
496-
self.assertEqual("0399a96ebab6339cac1c986dde578a27", binascii.hexlify(checksum).decode())
496+
self.assertEqual("418534e183a0820bc3d6830fc29ef46a", binascii.hexlify(checksum).decode())
497497
normal_report = []
498498
sorted_report = []
499499
with tempfile.TemporaryDirectory() as tmp_dir:

0 commit comments

Comments
 (0)