Skip to content

Commit e9dc3d1

Browse files
authored
fix StringsScanner hallucinations. Version UP 1.14.1 (#799)
* fix StringsScanner hallucinations * fix
1 parent 59c5995 commit e9dc3d1

File tree

5 files changed

+44
-28
lines changed

5 files changed

+44
-28
lines changed

.github/workflows/check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
run: |
9393
banner="$(python -m credsweeper --banner | head -1)"
9494
echo "banner = '${banner}'"
95-
if [ "CredSweeper 1.14.0 crc32:a52f81a3" != "${banner}" ]; then
95+
if [ "CredSweeper 1.14.1 crc32:5aa3f35d" != "${banner}" ]; then
9696
echo "Update the check for '${banner}'"
9797
exit 1
9898
fi

credsweeper/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@
2424
"__version__"
2525
]
2626

27-
__version__ = "1.14.0"
27+
__version__ = "1.14.1"

credsweeper/deep_scanner/strings_scanner.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,27 @@ class StringsScanner(AbstractScanner, ABC):
1515
"""Implements known binary file scanning with ASCII strings representations"""
1616

1717
@staticmethod
18-
def get_strings(data: bytes) -> List[Tuple[str, int]]:
18+
def get_enumerated_lines(data: bytes) -> List[Tuple[int, str]]:
1919
"""Processes binary to found ASCII strings. Use offset instead line number."""
20-
strings = []
21-
offset = 0
22-
line = ''
20+
enumerated_lines = []
21+
offset = -1
22+
line_items = []
2323
for n, x in enumerate(data):
2424
if 0x09 == x or 0x20 <= x <= 0x7E:
2525
# TAB, SPACE and visible ASCII symbols
26-
if not offset:
27-
# for line number
26+
if 0 > offset:
27+
# use start of string as line number
2828
offset = n
29-
line += chr(x)
30-
elif MIN_DATA_LEN <= len(line):
31-
strings.append((line, offset))
32-
offset = 0
33-
line = ''
34-
if MIN_DATA_LEN <= len(line):
35-
strings.append((line, offset))
36-
return strings
29+
line_items.append(chr(x))
30+
continue
31+
if MIN_DATA_LEN <= len(line_items):
32+
# add valuable lines only
33+
enumerated_lines.append((offset, ''.join(line_items)))
34+
offset = -1
35+
line_items.clear()
36+
if MIN_DATA_LEN <= len(line_items):
37+
enumerated_lines.append((offset, ''.join(line_items)))
38+
return enumerated_lines
3739

3840
def data_scan(
3941
self, #
@@ -42,9 +44,9 @@ def data_scan(
4244
recursive_limit_size: int) -> Optional[List[Candidate]]:
4345
"""Extracts data file from .ar (debian) archive and launches data_scan"""
4446

45-
if strings := StringsScanner.get_strings(data_provider.data):
46-
string_data_provider = StringContentProvider(lines=[x[0] for x in strings],
47-
line_numbers=[x[1] for x in strings],
47+
if strings := StringsScanner.get_enumerated_lines(data_provider.data):
48+
string_data_provider = StringContentProvider(lines=[x[1] for x in strings],
49+
line_numbers=[x[0] for x in strings],
4850
file_path=data_provider.file_path,
4951
file_type=data_provider.file_type,
5052
info=f"{data_provider.info}|STRINGS")

tests/data/depth_3_pedantic.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12798,16 +12798,16 @@
1279812798
"ml_probability": 1.0,
1279912799
"line_data_list": [
1280012800
{
12801-
"line": "0ui\tBJ=https://jrfdeg:[email protected]:32768/architecture",
12802-
"line_num": 1556,
12801+
"line": "https://jrfdeg:[email protected]:32768/architecture",
12802+
"line_num": 1640,
1280312803
"path": "./tests/samples/sample.elf",
1280412804
"info": "FILE:./tests/samples/sample.elf|STRINGS",
1280512805
"variable": "https://",
12806-
"variable_start": 7,
12807-
"variable_end": 15,
12806+
"variable_start": 0,
12807+
"variable_end": 8,
1280812808
"value": "dh3sjr8b",
12809-
"value_start": 22,
12810-
"value_end": 30,
12809+
"value_start": 15,
12810+
"value_end": 23,
1281112811
"entropy": 3.0
1281212812
}
1281312813
]

tests/deep_scanner/test_strings_scanner.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,25 @@
55
from credsweeper.deep_scanner.strings_scanner import StringsScanner
66

77

8-
class TestDebScanner(unittest.TestCase):
8+
class TestStringsScanner(unittest.TestCase):
99

1010
def setUp(self):
1111
self.maxDiff = None
1212

1313
@given(strategies.binary())
14-
def test_get_shannon_entropy_hypothesis_n(self, data):
15-
self.assertIsNotNone(StringsScanner.get_strings(data))
14+
def test_get_lines_hypothesis_n(self, data):
15+
self.assertIsNotNone(StringsScanner.get_enumerated_lines(data))
16+
17+
def test_get_lines_n(self):
18+
self.assertListEqual([], StringsScanner.get_enumerated_lines(b''))
19+
self.assertListEqual([], StringsScanner.get_enumerated_lines(b'\x00\xBE'))
20+
self.assertListEqual([], StringsScanner.get_enumerated_lines(b'\x9F\xBEP\xE3\xb4W\xA5:\xF1R\x9C00\xcf\x84t!'))
21+
self.assertListEqual([], StringsScanner.get_enumerated_lines(b'\x00\x01\x02PW:R00t\x0D\x00'))
22+
23+
def test_get_lines_p(self):
24+
self.assertListEqual([(3, "PW:R00t!")], StringsScanner.get_enumerated_lines(b'\x00\x01\x02PW:R00t!\x0D\x00'))
25+
self.assertListEqual([(0, "PW:R00t!")], StringsScanner.get_enumerated_lines(b'PW:R00t!\x0D\x00'))
26+
self.assertListEqual([(4, "PW:R00t!")], StringsScanner.get_enumerated_lines(b'\x00\x01\x02\x03PW:R00t!'))
27+
self.assertListEqual(
28+
[(9, 'Salt:CwXD\t3dsd'), (24, 'Token:SOMETEST')],
29+
StringsScanner.get_enumerated_lines(b'\x9F\xBEP\xE3\xb4W\xA5:\xFFSalt:CwXD\x093dsd\nToken:SOMETEST\0'))

0 commit comments

Comments
 (0)