Skip to content

Commit 59e37e9

Browse files
authored
Bugfix and version up to 1.14.3 (#804)
* bugfix * banner fix * refactoring * optimization * refactoring import * import optimization * ValueNotPartEncodedCheck added for facebook token * fix missed value and style * refuzzing
1 parent 2132141 commit 59e37e9

File tree

131 files changed

+1547
-1002
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+1547
-1002
lines changed

.github/workflows/check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
run: |
9393
banner="$(python -m credsweeper --banner | head -1)"
9494
echo "banner = '${banner}'"
95-
if [ "CredSweeper 1.14.2 crc32:76bde097" != "${banner}" ]; then
95+
if [ "CredSweeper 1.14.3 crc32:e5cd2d86" != "${banner}" ]; then
9696
echo "Update the check for '${banner}'"
9797
exit 1
9898
fi

credsweeper/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
"__version__"
2525
]
2626

27-
__version__ = "1.14.2"
27+
__version__ = "1.14.3"

credsweeper/__main__.py

Lines changed: 1 addition & 497 deletions
Large diffs are not rendered by default.

credsweeper/deep_scanner/byte_scanner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
from typing import List, Optional
44

55
from credsweeper.credentials.candidate import Candidate
6+
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
67
from credsweeper.file_handler.byte_content_provider import ByteContentProvider
78
from credsweeper.file_handler.data_content_provider import DataContentProvider
8-
from .abstract_scanner import AbstractScanner
99

1010
logger = logging.getLogger(__name__)
1111

credsweeper/deep_scanner/bzip2_scanner.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
from abc import ABC
44
from pathlib import Path
5-
from typing import List, Optional
5+
from typing import List, Optional, Union
66

77
from credsweeper.credentials.candidate import Candidate
88
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
@@ -15,6 +15,15 @@
1515
class Bzip2Scanner(AbstractScanner, ABC):
1616
"""Implements bzip2 scanning"""
1717

18+
@staticmethod
19+
def match(data: Union[bytes, bytearray]) -> bool:
20+
"""According https://en.wikipedia.org/wiki/Bzip2"""
21+
if data.startswith(b"\x42\x5A\x68") and 10 <= len(data) \
22+
and 0x31 <= data[3] <= 0x39 \
23+
and 4 == data.find(b"\x31\x41\x59\x26\x53\x59", 4, 10):
24+
return True
25+
return False
26+
1827
def data_scan(
1928
self, #
2029
data_provider: DataContentProvider, #

credsweeper/deep_scanner/deb_scanner.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import struct
33
from abc import ABC
4-
from typing import List, Optional, Generator, Tuple
4+
from typing import List, Optional, Generator, Tuple, Union
55

66
from credsweeper.common.constants import MIN_DATA_LEN, UTF_8
77
from credsweeper.credentials.candidate import Candidate
@@ -17,6 +17,13 @@ class DebScanner(AbstractScanner, ABC):
1717

1818
__header_size = 60
1919

20+
@staticmethod
21+
def match(data: Union[bytes, bytearray]) -> bool:
22+
"""According https://en.wikipedia.org/wiki/Deb_(file_format)"""
23+
if data.startswith(b"!<arch>\n"):
24+
return True
25+
return False
26+
2027
@staticmethod
2128
def walk_deb(data: bytes) -> Generator[Tuple[int, str, bytes], None, None]:
2229
"""Processes sequence of DEB archive and yields offset, name and data"""

credsweeper/deep_scanner/deep_scanner.py

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,37 @@
22
from typing import List, Any, Tuple
33

44
from credsweeper.config.config import Config
5+
from credsweeper.deep_scanner.byte_scanner import ByteScanner
6+
from credsweeper.deep_scanner.bzip2_scanner import Bzip2Scanner
7+
from credsweeper.deep_scanner.csv_scanner import CsvScanner
8+
from credsweeper.deep_scanner.deb_scanner import DebScanner
9+
from credsweeper.deep_scanner.docx_scanner import DocxScanner
10+
from credsweeper.deep_scanner.eml_scanner import EmlScanner
11+
from credsweeper.deep_scanner.encoder_scanner import EncoderScanner
12+
from credsweeper.deep_scanner.gzip_scanner import GzipScanner
13+
from credsweeper.deep_scanner.html_scanner import HtmlScanner
14+
from credsweeper.deep_scanner.jclass_scanner import JclassScanner
15+
from credsweeper.deep_scanner.jks_scanner import JksScanner
16+
from credsweeper.deep_scanner.lang_scanner import LangScanner
17+
from credsweeper.deep_scanner.lzma_scanner import LzmaScanner
18+
from credsweeper.deep_scanner.mxfile_scanner import MxfileScanner
19+
from credsweeper.deep_scanner.patch_scanner import PatchScanner
20+
from credsweeper.deep_scanner.pdf_scanner import PdfScanner
21+
from credsweeper.deep_scanner.pkcs_scanner import PkcsScanner
22+
from credsweeper.deep_scanner.png_scanner import PngScanner
23+
from credsweeper.deep_scanner.pptx_scanner import PptxScanner
24+
from credsweeper.deep_scanner.rpm_scanner import RpmScanner
25+
from credsweeper.deep_scanner.rtf_scanner import RtfScanner
26+
from credsweeper.deep_scanner.sqlite3_scanner import Sqlite3Scanner
27+
from credsweeper.deep_scanner.strings_scanner import StringsScanner
28+
from credsweeper.deep_scanner.tar_scanner import TarScanner
29+
from credsweeper.deep_scanner.tmx_scanner import TmxScanner
30+
from credsweeper.deep_scanner.xlsx_scanner import XlsxScanner
31+
from credsweeper.deep_scanner.xml_scanner import XmlScanner
32+
from credsweeper.deep_scanner.zip_scanner import ZipScanner
33+
from credsweeper.file_handler.descriptor import Descriptor
534
from credsweeper.scanner.scanner import Scanner
635
from credsweeper.utils.util import Util
7-
from .byte_scanner import ByteScanner
8-
from .bzip2_scanner import Bzip2Scanner
9-
from .csv_scanner import CsvScanner
10-
from .deb_scanner import DebScanner
11-
from .docx_scanner import DocxScanner
12-
from .eml_scanner import EmlScanner
13-
from .encoder_scanner import EncoderScanner
14-
from .gzip_scanner import GzipScanner
15-
from .html_scanner import HtmlScanner
16-
from .jclass_scanner import JclassScanner
17-
from .jks_scanner import JksScanner
18-
from .lang_scanner import LangScanner
19-
from .lzma_scanner import LzmaScanner
20-
from .mxfile_scanner import MxfileScanner
21-
from .patch_scanner import PatchScanner
22-
from .pdf_scanner import PdfScanner
23-
from .pkcs_scanner import PkcsScanner
24-
from .png_scanner import PngScanner
25-
from .pptx_scanner import PptxScanner
26-
from .rpm_scanner import RpmScanner
27-
from .rtf_scanner import RtfScanner
28-
from .sqlite3_scanner import Sqlite3Scanner
29-
from .strings_scanner import StringsScanner
30-
from .tar_scanner import TarScanner
31-
from .tmx_scanner import TmxScanner
32-
from .xlsx_scanner import XlsxScanner
33-
from .xml_scanner import XmlScanner
34-
from .zip_scanner import ZipScanner
35-
from ..file_handler.descriptor import Descriptor
3636

3737
logger = logging.getLogger(__name__)
3838

@@ -49,6 +49,8 @@ class DeepScanner(
4949
JksScanner, #
5050
LangScanner, #
5151
LzmaScanner, #
52+
MxfileScanner, #
53+
EmlScanner, #
5254
PatchScanner, #
5355
PdfScanner, #
5456
PkcsScanner, #
@@ -89,7 +91,7 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[
8991
"""Returns possibly scan methods for the data depends on content and fallback scanners"""
9092
deep_scanners: List[Any] = []
9193
fallback_scanners: List[Any] = []
92-
if Util.is_zip(data):
94+
if ZipScanner.match(data):
9395
if 0 < depth:
9496
deep_scanners.append(ZipScanner)
9597
# probably, there might be a docx, xlsx and so on.
@@ -106,62 +108,62 @@ def get_deep_scanners(data: bytes, descriptor: Descriptor, depth: int) -> Tuple[
106108
deep_scanners.append(PptxScanner)
107109
else:
108110
fallback_scanners.append(PptxScanner)
109-
elif Util.is_com(data):
111+
elif XlsxScanner.match(data):
110112
if ".xls" == descriptor.extension:
111113
deep_scanners.append(XlsxScanner)
112114
else:
113115
fallback_scanners.append(XlsxScanner)
114-
elif Util.is_bzip2(data):
116+
elif Bzip2Scanner.match(data):
115117
if 0 < depth:
116118
deep_scanners.append(Bzip2Scanner)
117-
elif Util.is_lzma(data):
119+
elif LzmaScanner.match(data):
118120
if 0 < depth:
119121
deep_scanners.append(LzmaScanner)
120-
elif Util.is_tar(data):
122+
elif TarScanner.match(data):
121123
if 0 < depth:
122124
deep_scanners.append(TarScanner)
123-
elif Util.is_deb(data):
125+
elif DebScanner.match(data):
124126
if 0 < depth:
125127
deep_scanners.append(DebScanner)
126-
elif Util.is_gzip(data):
128+
elif GzipScanner.match(data):
127129
if 0 < depth:
128130
deep_scanners.append(GzipScanner)
129-
elif Util.is_pdf(data):
131+
elif PdfScanner.match(data):
130132
deep_scanners.append(PdfScanner)
131-
elif Util.is_png(data):
133+
elif PngScanner.match(data):
132134
deep_scanners.append(PngScanner)
133-
elif Util.is_rpm(data):
135+
elif RpmScanner.match(data):
134136
if 0 < depth:
135137
deep_scanners.append(RpmScanner)
136-
elif Util.is_jclass(data):
138+
elif JclassScanner.match(data):
137139
deep_scanners.append(JclassScanner)
138-
elif Util.is_jks(data):
140+
elif JksScanner.match(data):
139141
deep_scanners.append(JksScanner)
140-
elif Util.is_sqlite3(data):
142+
elif Sqlite3Scanner.match(data):
141143
if 0 < depth:
142144
deep_scanners.append(Sqlite3Scanner)
143-
elif Util.is_asn1(data):
145+
elif PkcsScanner.match(data):
144146
deep_scanners.append(PkcsScanner)
145-
elif Util.is_rtf(data):
147+
elif RtfScanner.match(data):
146148
deep_scanners.append(RtfScanner)
147149
fallback_scanners.append(ByteScanner)
148-
elif Util.is_xml(data):
149-
if Util.is_html(data):
150+
elif XmlScanner.match(data):
151+
if HtmlScanner.match(data):
150152
deep_scanners.append(HtmlScanner)
151153
deep_scanners.append(XmlScanner)
152154
fallback_scanners.append(ByteScanner)
153-
elif Util.is_mxfile(data):
155+
elif MxfileScanner.match(data):
154156
deep_scanners.append(MxfileScanner)
155157
deep_scanners.append(XmlScanner)
156158
fallback_scanners.append(ByteScanner)
157-
elif Util.is_tmx(data):
159+
elif TmxScanner.match(data):
158160
deep_scanners.append(TmxScanner)
159161
fallback_scanners.append(XmlScanner)
160162
fallback_scanners.append(ByteScanner)
161163
else:
162164
deep_scanners.append(XmlScanner)
163165
fallback_scanners.append(ByteScanner)
164-
elif Util.is_eml(data):
166+
elif EmlScanner.match(data):
165167
if descriptor.extension in (".eml", ".mht"):
166168
deep_scanners.append(EmlScanner)
167169
else:

credsweeper/deep_scanner/eml_scanner.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import email
22
import logging
33
from abc import ABC
4-
from typing import List, Optional
4+
from typing import List, Optional, Union
55

66
from credsweeper.credentials.candidate import Candidate
77
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
@@ -15,6 +15,16 @@
1515
class EmlScanner(AbstractScanner, ABC):
1616
"""Implements eml scanning"""
1717

18+
@staticmethod
19+
def match(data: Union[bytes, bytearray]) -> bool:
20+
"""According to https://datatracker.ietf.org/doc/html/rfc822 lookup the fields: Date, From, To or Subject"""
21+
if (b"\nDate:" in data or data.startswith(b"Date:")) \
22+
and (b"\nFrom:" in data or data.startswith(b"From:")) \
23+
and (b"\nTo:" in data or data.startswith(b"To:")) \
24+
and (b"\nSubject:" in data or data.startswith(b"Subject:")):
25+
return True
26+
return False
27+
1828
def data_scan(
1929
self, #
2030
data_provider: DataContentProvider, #

credsweeper/deep_scanner/gzip_scanner.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
from abc import ABC
55
from pathlib import Path
6-
from typing import List, Optional
6+
from typing import List, Optional, Union
77

88
from credsweeper.credentials.candidate import Candidate
99
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
@@ -16,6 +16,13 @@
1616
class GzipScanner(AbstractScanner, ABC):
1717
"""Realises gzip scanning"""
1818

19+
@staticmethod
20+
def match(data: Union[bytes, bytearray]) -> bool:
21+
"""According https://www.rfc-editor.org/rfc/rfc1952"""
22+
if isinstance(data, (bytes, bytearray)) and data.startswith(b"\x1F\x8B\x08"):
23+
return True
24+
return False
25+
1926
def data_scan(
2027
self, #
2128
data_provider: DataContentProvider, #

credsweeper/deep_scanner/html_scanner.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import logging
22
from abc import ABC
3-
from typing import List, Optional
3+
from typing import List, Optional, Union
44

5+
from credsweeper.common.constants import MAX_LINE_LENGTH
56
from credsweeper.credentials.candidate import Candidate
67
from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
78
from credsweeper.file_handler.data_content_provider import DataContentProvider
@@ -13,6 +14,19 @@
1314
class HtmlScanner(AbstractScanner, ABC):
1415
"""Implements html scanning if possible"""
1516

17+
@staticmethod
18+
def match(data: Union[bytes, bytearray]) -> bool:
19+
"""Used to detect html format. Suppose, invocation of is_xml() was True before."""
20+
for opening_tag, closing_tag in [(b"<html", b"</html>"), (b"<body", b"</body>"), (b"<table", b"</table>"),
21+
(b"<p>", b"</p>"), (b"<span>", b"</span>"), (b"<div>", b"</div>"),
22+
(b"<li>", b"</li>"), (b"<ol>", b"</ol>"), (b"<ul>", b"</ul>"),
23+
(b"<th>", b"</th>"), (b"<tr>", b"</tr>"), (b"<td>", b"</td>")]:
24+
opening_pos = data.find(opening_tag, 0, MAX_LINE_LENGTH)
25+
if 0 <= opening_pos < data.find(closing_tag, opening_pos):
26+
# opening and closing tags were found - suppose it is an HTML
27+
return True
28+
return False
29+
1630
def data_scan(
1731
self, #
1832
data_provider: DataContentProvider, #

0 commit comments

Comments
 (0)