Skip to content

Commit b4b7d4c

Browse files
Improving embedded JS handling (#125)
Co-authored-by: Brandon Murphy <4827852+zoomequipd@users.noreply.github.com>
1 parent 7e3b276 commit b4b7d4c

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

build/configs/scanners.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ scanners:
248248
flavors:
249249
- 'javascript_file'
250250
- 'text/javascript'
251+
- 'application/ecmascript'
251252
priority: 5
252253
options:
253254
beautify: True

src/python/strelka/scanners/scan_html.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def scan(self, data, file, options, expire_at):
106106
script_flavors = [
107107
script.get('language', '').lower(),
108108
script.get('type', '').lower(),
109+
'text/javascript',
109110
]
110111
script_entry = {
111112
'src': script.get('src'),

src/python/strelka/scanners/scan_xml.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
class ScanXml(strelka.Scanner):
77
"""Collects metadata and extracts embedded files from XML files.
88
9+
Extracts JavaScript content from script tags and emits them as child files.
10+
911
Options:
1012
extract_tags: List of XML tags that will have their text extracted
1113
as child files.
@@ -23,7 +25,7 @@ def scan(self, data, file, options, expire_at):
2325
self.event.setdefault('tags', [])
2426
self.event.setdefault('tag_data', [])
2527
self.event.setdefault('namespaces', [])
26-
self.event['total'] = {'tags': 0, 'extracted': 0}
28+
self.event['total'] = {'tags': 0, 'scripts': 0, 'extracted': 0}
2729

2830
xml = None
2931
try:
@@ -89,6 +91,30 @@ def _recurse_node(self, node, xml_args):
8991
self.files.append(extract_file)
9092
self.event['total']['extracted'] += 1
9193

94+
# Check for script tags and extract JavaScript content
95+
if tag == 'script':
96+
self.event['total']['scripts'] += 1
97+
98+
if text and text.strip():
99+
extract_file = strelka.File(
100+
name=f'script_{self.event["total"]["scripts"]-1}',
101+
source=self.name,
102+
)
103+
script_flavors = [
104+
node.attrib.get('type', '').lower(),
105+
]
106+
extract_file.add_flavors({'external': script_flavors})
107+
108+
for c in strelka.chunk_string(text):
109+
self.upload_to_coordinator(
110+
extract_file.pointer,
111+
c,
112+
self.expire_at,
113+
)
114+
115+
self.files.append(extract_file)
116+
self.event['total']['extracted'] += 1
117+
92118
for child in node.getchildren():
93119
self._recurse_node(self, child, xml_args)
94120

0 commit comments

Comments
 (0)