Skip to content

Commit 949e886

Browse files
committed
Move from TheHive-Project/Cortex-Analyzers/contrib
1 parent f926229 commit 949e886

18 files changed

+973
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,6 @@ venv.bak/
102102

103103
# mypy
104104
.mypy_cache/
105+
106+
# pycham
107+
.idea

MANIFEST.in

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
include *.py
2+
include LICENSE
3+
exclude requirements.txt
4+
exclude README.md
5+
recursive-include cortexutils *.py

README

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cortexutils is a set of classes that aims to make users write Cortex analyzers easier.

cortexutils/__init__.py

Whitespace-only changes.

cortexutils/analyzer.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
4+
import json
5+
from cortexutils.worker import Worker
6+
from cortexutils.extractor import Extractor
7+
8+
9+
class Analyzer(Worker):
10+
11+
def __init__(self):
12+
Worker.__init__(self)
13+
14+
# Not breaking compatibility
15+
self.artifact = self._input
16+
17+
# Check for auto extraction config
18+
self.auto_extract = self.get_param('config.auto_extract', self.get_param('config.auto_extract_artifacts', True))
19+
20+
def get_data(self):
21+
"""Wrapper for getting data from input dict.
22+
23+
:return: Data (observable value) given through Cortex"""
24+
if self.data_type == 'file':
25+
return self.get_param('filename', None, 'Missing filename.')
26+
return self.get_param('data', None, 'Missing data field')
27+
28+
def build_taxonomy(self, level, namespace, predicate, value):
29+
"""
30+
:param level: info, safe, suspicious or malicious
31+
:param namespace: Name of analyzer
32+
:param predicate: Name of service
33+
:param value: value
34+
:return: dict
35+
"""
36+
# Set info level if something not expected is set
37+
if level not in ['info', 'safe', 'suspicious', 'malicious']:
38+
level = 'info'
39+
return {
40+
'level': level,
41+
'namespace': namespace,
42+
'predicate': predicate,
43+
'value': value
44+
}
45+
46+
def summary(self, raw):
47+
"""Returns a summary, needed for 'short.html' template. Overwrite it for your needs!
48+
49+
:returns: by default return an empty dict"""
50+
return {}
51+
52+
def artifacts(self, raw):
53+
# Use the regex extractor, if auto_extract setting is not False
54+
if self.auto_extract:
55+
extractor = Extractor(ignore=self.get_data())
56+
return extractor.check_iterable(raw)
57+
58+
# Return empty list
59+
return []
60+
61+
def report(self, full_report, ensure_ascii=False):
62+
"""Returns a json dict via stdout.
63+
64+
:param full_report: Analyzer results as dict.
65+
:param ensure_ascii: Force ascii output. Default: False"""
66+
67+
summary = {}
68+
try:
69+
summary = self.summary(full_report)
70+
except Exception:
71+
pass
72+
73+
report = {
74+
'success': True,
75+
'summary': summary,
76+
'artifacts': self.artifacts(full_report),
77+
'full': full_report
78+
}
79+
json.dump(report, self.fpoutput, ensure_ascii=ensure_ascii)
80+
81+
def run(self):
82+
"""Overwritten by analyzers"""
83+
pass
84+
85+
# Not breaking compatibility
86+
def notSupported(self):
87+
self.error('This datatype is not supported by this analyzer.')
88+
89+
# Not breaking compatibility
90+
def unexpectedError(self, e):
91+
self.error('Unexpected Error: ' + str(e))
92+
93+
# Not breaking compatibility
94+
def getData(self):
95+
"""For not breaking compatibility to cortexutils.analyzer, this wraps get_data()"""
96+
return self.get_data()
97+
98+
# Not breaking compatibility
99+
def getParam(self, name, default=None, message=None):
100+
"""For not breaking compatibility to cortexutils.analyzer, this wraps get_param()"""
101+
return self.get_param(name=name, default=default, message=message)
102+
103+
# Not breaking compatibility
104+
def checkTlp(self, message):
105+
if not (self.__check_tlp()):
106+
self.error(message)

cortexutils/extractor.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#!/usr/bin/env python
2+
from builtins import str as unicode
3+
4+
import re
5+
6+
7+
class ExtractionError(Exception):
8+
pass
9+
10+
11+
class Extractor:
12+
"""
13+
The extractor class tries to detect ioc attribute types using regex-matching. Two functions are provided:
14+
- ``check_string(str)`` which checks a string for a regex match and just returns the type
15+
- ``check_iterable(itr)`` that iterates over a list or a dictionary and returns a list of {type, value} dicts
16+
17+
Currently, this is not a fulltext search, so the the ioc's must be isolated strings, to get found.
18+
This can be iterated for ioc's.
19+
20+
:param ignore: List of strings or a single string to ignore when matching artifacts to type
21+
:type ignore: list, str
22+
"""
23+
24+
def __init__(self, ignore=None):
25+
self.ignore = ignore
26+
self.regex = self.__init_regex()
27+
28+
@staticmethod
29+
def __init_regex():
30+
"""
31+
Returns compiled regex list.
32+
33+
:return: List of {type, regex} dicts
34+
:rtype: list
35+
"""
36+
37+
# IPv4
38+
regex = [{
39+
'type': 'ip',
40+
'regex': re.compile(r'[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')
41+
}]
42+
43+
# IPv6
44+
# RegEx from https://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
45+
r = '(' + \
46+
'([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|' + \
47+
'([0-9a-fA-F]{1,4}:){1,7}:|' + \
48+
'([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|' + \
49+
'([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|' + \
50+
'([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|' + \
51+
'([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|' + \
52+
'([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|' + \
53+
'[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|' + \
54+
':((:[0-9a-fA-F]{1,4}){1,7}|:)|' + \
55+
'fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|' + \
56+
'::(ffff(:0{1,4}){0,1}:){0,1}' + \
57+
'((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}' + \
58+
'(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|' + \
59+
'([0-9a-fA-F]{1,4}:){1,4}:' + \
60+
'((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}' + \
61+
'(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])' + \
62+
')'
63+
regex.append({
64+
'type': 'ip',
65+
'regex': re.compile(r'{}'.format(r))
66+
})
67+
68+
# URL
69+
regex.append({
70+
'type': 'url',
71+
'regex': re.compile(r'^(http://|https://)')
72+
})
73+
74+
# domain
75+
regex.append({
76+
'type': 'domain',
77+
'regex': re.compile(r'^(?!http://|https://)^[\w\-]+\.[a-zA-Z]+$')
78+
})
79+
80+
# hash
81+
regex.append({
82+
'type': 'hash',
83+
'regex': re.compile(r'^([0-9a-fA-F]{32}|[0-9a-fA-F]{40}|[0-9a-fA-F]{64})$')
84+
})
85+
86+
# user-agent
87+
regex.append({
88+
'type': 'user-agent',
89+
'regex': re.compile(r'^(Mozilla/[45]\.0 |AppleWebKit/[0-9]{3}\.[0-9]{2} |Chrome/[0-9]{2}\.[0-9]\.'
90+
r'[0-9]{4}\.[0-9]{3} |Safari/[0-9]{3}\.[0-9]{2} ).*?$')
91+
})
92+
93+
# uri_path
94+
regex.append({
95+
'type': 'uri_path',
96+
'regex': re.compile(r'^(?!http://|https://)[A-Za-z]*://')
97+
})
98+
99+
# regkey
100+
regex.append({
101+
'type': 'registry',
102+
'regex': re.compile(r'^(HKEY|HKLM|HKCU|HKCR|HKCC)'
103+
r'(_LOCAL_MACHINE|_CURRENT_USER|_CURRENT_CONFIG|_CLASSES_ROOT|)[\\a-zA-Z0-9]+$')
104+
})
105+
106+
# mail
107+
regex.append({
108+
'type': 'mail',
109+
'regex': re.compile(r'[\w.\-]+@\w+\.[\w.]+')
110+
})
111+
112+
# fqdn
113+
regex.append({
114+
'type': 'fqdn',
115+
'regex': re.compile(r'^(?!http://|https://)^[\w\-.]+\.[\w\-]+\.[a-zA-Z]+$')
116+
})
117+
118+
return regex
119+
120+
def __checktype(self, value):
121+
"""Checks if the given value is a known datatype
122+
123+
:param value: The value to check
124+
:type value: str or number
125+
:return: Data type of value, if known, else empty string
126+
:rtype: str
127+
"""
128+
if self.ignore:
129+
if isinstance(value, str) and self.ignore in value:
130+
return ''
131+
if self.ignore == value:
132+
return ''
133+
134+
if isinstance(value, (str, unicode)):
135+
for r in self.regex:
136+
if r.get('regex').match(value):
137+
return r.get('type')
138+
return ''
139+
140+
def check_string(self, value):
141+
"""
142+
Checks if a string matches a datatype.
143+
144+
:param value: String to test
145+
:type value: str
146+
:return: Data type or empty string
147+
:rtype: str
148+
"""
149+
return self.__checktype(value)
150+
151+
def check_iterable(self, iterable):
152+
"""
153+
Checks values of a list or a dict on ioc's. Returns a list of dict {type, value}. Raises TypeError, if iterable
154+
is not an expected type.
155+
156+
:param iterable: List or dict of values
157+
:type iterable: list dict str
158+
:return: List of ioc's matching the regex
159+
:rtype: list
160+
"""
161+
results = []
162+
# Only the string left
163+
if isinstance(iterable, (str, unicode)):
164+
dt = self.__checktype(iterable)
165+
if len(dt) > 0:
166+
results.append({
167+
'type': dt,
168+
'value': iterable
169+
})
170+
elif isinstance(iterable, list):
171+
for item in iterable:
172+
if isinstance(item, list) or isinstance(item, dict):
173+
results.extend(self.check_iterable(item))
174+
else:
175+
dt = self.__checktype(item)
176+
if len(dt) > 0:
177+
results.append({
178+
'type': dt,
179+
'value': item
180+
})
181+
elif isinstance(iterable, dict):
182+
for _, item in iterable.items():
183+
if isinstance(item, list) or isinstance(item, dict):
184+
results.extend(self.check_iterable(item))
185+
else:
186+
dt = self.__checktype(item)
187+
if len(dt) > 0:
188+
results.append({
189+
'type': dt,
190+
'value': item
191+
})
192+
else:
193+
raise TypeError('Not supported type.')
194+
195+
return results

cortexutils/responder.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
4+
import json
5+
from cortexutils.worker import Worker
6+
7+
8+
class Responder(Worker):
9+
10+
def __init__(self):
11+
Worker.__init__(self)
12+
13+
# Not breaking compatibility
14+
self.artifact = self._input
15+
16+
def get_data(self):
17+
"""Wrapper for getting data from input dict.
18+
19+
:return: Data (observable value) given through Cortex"""
20+
return self.get_param('data', None, 'Missing data field')
21+
22+
@staticmethod
23+
def build_operation(op_type, **parameters):
24+
"""
25+
:param op_type: an operation type as a string
26+
:param parameters: a dict including the operation's params
27+
:return: dict
28+
"""
29+
operation = {
30+
'type': op_type
31+
}
32+
operation.update(parameters)
33+
34+
return operation
35+
36+
def operations(self, raw):
37+
"""Returns the list of operations to be executed after the job completes
38+
39+
:returns: by default return an empty array"""
40+
return []
41+
42+
def report(self, full_report, ensure_ascii=False):
43+
"""Returns a json dict via stdout.
44+
45+
:param full_report: Responsder results as dict.
46+
:param ensure_ascii: Force ascii output. Default: False"""
47+
48+
operation_list = []
49+
try:
50+
operation_list = self.operations(full_report)
51+
except Exception:
52+
pass
53+
54+
report = {
55+
'success': True,
56+
'full': full_report,
57+
'operations': operation_list
58+
}
59+
json.dump(report, self.fpoutput, ensure_ascii=ensure_ascii)
60+
61+
def run(self):
62+
"""Overwritten by responders"""
63+
pass

0 commit comments

Comments
 (0)